LLVM 14.0.0
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/LLVMContext.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <string>
75#include <tuple>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "dagcombine"
81
82STATISTIC(NodesCombined , "Number of dag nodes combined");
83STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
86STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
87STATISTIC(SlicedLoads, "Number of load sliced");
88STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89
90static cl::opt<bool>
91CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93
94static cl::opt<bool>
95UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96 cl::desc("Enable DAG combiner's use of TBAA"));
97
98#ifndef NDEBUG
100CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101 cl::desc("Only use DAG-combiner alias analysis in this"
102 " function"));
103#endif
104
105/// Hidden option to stress test load slicing, i.e., when this option
106/// is enabled, load slicing bypasses most of its profitability guards.
107static cl::opt<bool>
108StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109 cl::desc("Bypass the profitability model of load slicing"),
110 cl::init(false));
111
112static cl::opt<bool>
113 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114 cl::desc("DAG combiner may split indexing from loads"));
115
116static cl::opt<bool>
117 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118 cl::desc("DAG combiner enable merging multiple stores "
119 "into a wider store"));
120
122 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123 cl::desc("Limit the number of operands to inline for Token Factors"));
124
126 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127 cl::desc("Limit the number of times for the same StoreNode and RootNode "
128 "to bail out in store merging dependence check"));
129
131 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132 cl::desc("DAG combiner enable reducing the width of load/op/store "
133 "sequence"));
134
136 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137 cl::desc("DAG combiner enable load/<replace bytes>/store with "
138 "a narrower store"));
139
140namespace {
141
142 class DAGCombiner {
143 SelectionDAG &DAG;
144 const TargetLowering &TLI;
145 const SelectionDAGTargetInfo *STI;
147 CodeGenOpt::Level OptLevel;
148 bool LegalDAG = false;
149 bool LegalOperations = false;
150 bool LegalTypes = false;
151 bool ForCodeSize;
152 bool DisableGenericCombines;
153
154 /// Worklist of all of the nodes that need to be simplified.
155 ///
156 /// This must behave as a stack -- new nodes to process are pushed onto the
157 /// back and when processing we pop off of the back.
158 ///
159 /// The worklist will not contain duplicates but may contain null entries
160 /// due to nodes being deleted from the underlying DAG.
162
163 /// Mapping from an SDNode to its position on the worklist.
164 ///
165 /// This is used to find and remove nodes from the worklist (by nulling
166 /// them) when they are deleted from the underlying DAG. It relies on
167 /// stable indices of nodes within the worklist.
169 /// This records all nodes attempted to add to the worklist since we
170 /// considered a new worklist entry. As we keep do not add duplicate nodes
171 /// in the worklist, this is different from the tail of the worklist.
173
174 /// Set of nodes which have been combined (at least once).
175 ///
176 /// This is used to allow us to reliably add any operands of a DAG node
177 /// which have not yet been combined to the worklist.
178 SmallPtrSet<SDNode *, 32> CombinedNodes;
179
180 /// Map from candidate StoreNode to the pair of RootNode and count.
181 /// The count is used to track how many times we have seen the StoreNode
182 /// with the same RootNode bail out in dependence check. If we have seen
183 /// the bail out for the same pair many times over a limit, we won't
184 /// consider the StoreNode with the same RootNode as store merging
185 /// candidate again.
187
188 // AA - Used for DAG load/store alias analysis.
189 AliasAnalysis *AA;
190
191 /// When an instruction is simplified, add all users of the instruction to
192 /// the work lists because they might get more simplified now.
194 for (SDNode *Node : N->uses())
195 AddToWorklist(Node);
196 }
197
198 /// Convenient shorthand to add a node and all of its user to the worklist.
201 AddToWorklist(N);
202 }
203
204 // Prune potentially dangling nodes. This is called after
205 // any visit to a node, but should also be called during a visit after any
206 // failed combine which may have created a DAG node.
208 // Check any nodes added to the worklist to see if they are prunable.
209 while (!PruningList.empty()) {
210 auto *N = PruningList.pop_back_val();
211 if (N->use_empty())
212 recursivelyDeleteUnusedNodes(N);
213 }
214 }
215
217 // Before we do any work, remove nodes that are not in use.
219 SDNode *N = nullptr;
220 // The Worklist holds the SDNodes in order, but it may contain null
221 // entries.
222 while (!N && !Worklist.empty()) {
223 N = Worklist.pop_back_val();
224 }
225
226 if (N) {
227 bool GoodWorklistEntry = WorklistMap.erase(N);
230 "Found a worklist entry without a corresponding map entry!");
231 }
232 return N;
233 }
234
235 /// Call the node-specific routine that folds each particular type of node.
236 SDValue visit(SDNode *N);
237
238 public:
240 : DAG(D), TLI(D.getTargetLoweringInfo()),
241 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
242 ForCodeSize = DAG.shouldOptForSize();
243 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
244
245 MaximumLegalStoreInBits = 0;
246 // We use the minimum store size here, since that's all we can guarantee
247 // for the scalable vector types.
248 for (MVT VT : MVT::all_valuetypes())
249 if (EVT(VT).isSimple() && VT != MVT::Other &&
250 TLI.isTypeLegal(EVT(VT)) &&
251 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
252 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
253 }
254
256 // Mark this for potential pruning.
257 PruningList.insert(N);
258 }
259
260 /// Add to the worklist making sure its instance is at the back (next to be
261 /// processed.)
262 void AddToWorklist(SDNode *N) {
263 assert(N->getOpcode() != ISD::DELETED_NODE &&
264 "Deleted Node added to Worklist");
265
266 // Skip handle nodes as they can't usefully be combined and confuse the
267 // zero-use deletion strategy.
268 if (N->getOpcode() == ISD::HANDLENODE)
269 return;
270
272
273 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
274 Worklist.push_back(N);
275 }
276
277 /// Remove all instances of N from the worklist.
279 CombinedNodes.erase(N);
280 PruningList.remove(N);
281 StoreRootCountMap.erase(N);
282
283 auto It = WorklistMap.find(N);
284 if (It == WorklistMap.end())
285 return; // Not in the worklist.
286
287 // Null out the entry rather than erasing it to avoid a linear operation.
288 Worklist[It->second] = nullptr;
289 WorklistMap.erase(It);
290 }
291
293 bool recursivelyDeleteUnusedNodes(SDNode *N);
294
295 /// Replaces all uses of the results of one DAG node with new values.
296 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
297 bool AddTo = true);
298
299 /// Replaces all uses of the results of one DAG node with new values.
300 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
301 return CombineTo(N, &Res, 1, AddTo);
302 }
303
304 /// Replaces all uses of the results of one DAG node with new values.
305 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
306 bool AddTo = true) {
307 SDValue To[] = { Res0, Res1 };
308 return CombineTo(N, To, 2, AddTo);
309 }
310
311 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
312
313 private:
314 unsigned MaximumLegalStoreInBits;
315
316 /// Check the specified integer node value to see if it can be simplified or
317 /// if things it uses can be simplified by bit propagation.
318 /// If so, return true.
319 bool SimplifyDemandedBits(SDValue Op) {
320 unsigned BitWidth = Op.getScalarValueSizeInBits();
322 return SimplifyDemandedBits(Op, DemandedBits);
323 }
324
325 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
326 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
327 KnownBits Known;
328 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
329 return false;
330
331 // Revisit the node.
332 AddToWorklist(Op.getNode());
333
334 CommitTargetLoweringOpt(TLO);
335 return true;
336 }
337
338 /// Check the specified vector node value to see if it can be simplified or
339 /// if things it uses can be simplified as it only uses some of the
340 /// elements. If so, return true.
341 bool SimplifyDemandedVectorElts(SDValue Op) {
342 // TODO: For now just pretend it cannot be simplified.
343 if (Op.getValueType().isScalableVector())
344 return false;
345
346 unsigned NumElts = Op.getValueType().getVectorNumElements();
348 return SimplifyDemandedVectorElts(Op, DemandedElts);
349 }
350
351 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
352 const APInt &DemandedElts,
353 bool AssumeSingleUse = false);
354 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
355 bool AssumeSingleUse = false);
356
360 bool SliceUpLoad(SDNode *N);
361
362 // Scalars have size 0 to distinguish from singleton vectors.
366
367 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
368 /// load.
369 ///
370 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
371 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
372 /// \param EltNo index of the vector element to load.
373 /// \param OriginalLoad load that EVE came from to be replaced.
374 /// \returns EVE on success SDValue() on failure.
385 bool PromoteLoad(SDValue Op);
386
387 /// Call the node-specific routine that knows how to fold each
388 /// particular type of node. If that doesn't do anything, try the
389 /// target-specific DAG combines.
391
392 // Visitation implementation - Implement dag node combining for different
393 // node types. The semantics are as follows:
394 // Return Value:
395 // SDValue.getNode() == 0 - No change was made
396 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
397 // otherwise - N should be replaced by the returned Operand.
398 //
494
497
516
520
522 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
523 const SDLoc &DL, SDValue N0,
524 SDValue N1);
525 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
526 SDValue N1);
527 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
528 SDValue N1, SDNodeFlags Flags);
529
531
535 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
539 SDValue N2, SDValue N3, ISD::CondCode CC,
540 bool NotExtCompare = false);
542 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
543 ISD::CondCode CC);
546 SDValue N2, SDValue N3, ISD::CondCode CC);
550 const SDLoc &DL);
554 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
555 const SDLoc &DL, bool foldBooleans);
557
558 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
559 SDValue &CC, bool MatchStrict = false) const;
560 bool isOneUseSetCC(SDValue N) const;
561
563 unsigned HiOp);
567 SDValue combineRepeatedFPDivisors(SDNode *N);
570 SDValue BuildSDIV(SDNode *N);
571 SDValue BuildSDIVPow2(SDNode *N);
572 SDValue BuildUDIV(SDNode *N);
578 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
579 SDNodeFlags Flags, bool Reciprocal);
580 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
581 SDNodeFlags Flags, bool Reciprocal);
583 bool DemandHighBits = true);
587 unsigned PosOpcode, unsigned NegOpcode,
588 const SDLoc &DL);
591 unsigned PosOpcode, unsigned NegOpcode,
592 const SDLoc &DL);
593 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
606 SDValue VecIn2, unsigned LeftIdx,
607 bool DidSplitVec);
609
610 /// Walk up chain skipping non-aliasing memory nodes,
611 /// looking for aliasing nodes and adding them to the Aliases vector.
613 SmallVectorImpl<SDValue> &Aliases);
614
615 /// Return true if there is any possibility that the two addresses overlap.
616 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
617
618 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
619 /// chain (aliasing node.)
621
622 /// Try to replace a store and any possibly adjacent stores on
623 /// consecutive chains with better chains. Return true only if St is
624 /// replaced.
625 ///
626 /// Notice that other chains may still be replaced even if the function
627 /// returns false.
629
630 // Helper for findBetterNeighborChains. Walk up store chain add additional
631 // chained stores that do not overlap and can be parallelized.
633
634 /// Holds a pointer to an LSBaseSDNode as well as information on where it
635 /// is located in a sequence of memory operations connected by a chain.
636 struct MemOpLink {
637 // Ptr to the mem node.
638 LSBaseSDNode *MemNode;
639
640 // Offset from the base ptr.
641 int64_t OffsetFromBase;
642
643 MemOpLink(LSBaseSDNode *N, int64_t Offset)
644 : MemNode(N), OffsetFromBase(Offset) {}
645 };
646
647 // Classify the origin of a stored value.
648 enum class StoreSource { Unknown, Constant, Extract, Load };
649 StoreSource getStoreSource(SDValue StoreVal) {
650 switch (StoreVal.getOpcode()) {
651 case ISD::Constant:
652 case ISD::ConstantFP:
653 return StoreSource::Constant;
656 return StoreSource::Extract;
657 case ISD::LOAD:
658 return StoreSource::Load;
659 default:
660 return StoreSource::Unknown;
661 }
662 }
663
664 /// This is a helper function for visitMUL to check the profitability
665 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
666 /// MulNode is the original multiply, AddNode is (add x, c1),
667 /// and ConstNode is c2.
668 bool isMulAddWithConstProfitable(SDNode *MulNode,
671
672 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
673 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
674 /// the type of the loaded value to be extended.
677
678 /// Helper function to calculate whether the given Load/Store can have its
679 /// width reduced to ExtVT.
681 EVT &MemVT, unsigned ShAmt = 0);
682
683 /// Used by BackwardsPropagateMask to find suitable loads.
687 /// Attempt to propagate a given AND node back to load leaves so that they
688 /// can be combined into narrow loads.
690
691 /// Helper function for mergeConsecutiveStores which merges the component
692 /// store chains.
694 unsigned NumStores);
695
696 /// This is a helper function for mergeConsecutiveStores. When the source
697 /// elements of the consecutive stores are all constants or all extracted
698 /// vector elements, try to merge them into one larger store introducing
699 /// bitcasts if necessary. \return True if a merged store was created.
701 EVT MemVT, unsigned NumStores,
702 bool IsConstantSrc, bool UseVector,
703 bool UseTrunc);
704
705 /// This is a helper function for mergeConsecutiveStores. Stores that
706 /// potentially may be merged with St are placed in StoreNodes. RootNode is
707 /// a chain predecessor to all store candidates.
710 SDNode *&Root);
711
712 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
713 /// have indirect dependency through their operands. RootNode is the
714 /// predecessor to all stores calculated by getStoreMergeCandidates and is
715 /// used to prune the dependency check. \return True if safe to merge.
717 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
718 SDNode *RootNode);
719
720 /// This is a helper function for mergeConsecutiveStores. Given a list of
721 /// store candidates, find the first N that are consecutive in memory.
722 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
724 int64_t ElementSizeBytes) const;
725
726 /// This is a helper function for mergeConsecutiveStores. It is used for
727 /// store chains that are composed entirely of constant values.
729 unsigned NumConsecutiveStores,
730 EVT MemVT, SDNode *Root, bool AllowVectors);
731
732 /// This is a helper function for mergeConsecutiveStores. It is used for
733 /// store chains that are composed entirely of extracted vector elements.
734 /// When extracting multiple vector elements, try to store them in one
735 /// vector store rather than a sequence of scalar stores.
738 SDNode *Root);
739
740 /// This is a helper function for mergeConsecutiveStores. It is used for
741 /// store chains that are composed entirely of loaded values.
744 SDNode *Root, bool AllowVectors,
746
747 /// Merge consecutive store operations into a wide store.
748 /// This optimization uses wide integers or vectors when possible.
749 /// \return true if stores were merged.
751
752 /// Try to transform a truncation where C is a constant:
753 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
754 ///
755 /// \p N needs to be a truncation and its first operand an AND. Other
756 /// requirements are checked by the function (e.g. that trunc is
757 /// single-use) and if missed an empty SDValue is returned.
759
760 /// Helper function to determine whether the target supports operation
761 /// given by \p Opcode for type \p VT, that is, whether the operation
762 /// is legal or custom before legalizing operations, and whether is
763 /// legal (but not custom) after legalization.
764 bool hasOperation(unsigned Opcode, EVT VT) {
765 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
766 }
767
768 public:
769 /// Runs the dag combiner on all nodes in the work list
770 void Run(CombineLevel AtLevel);
771
772 SelectionDAG &getDAG() const { return DAG; }
773
774 /// Returns a type large enough to hold any valid shift amount - before type
775 /// legalization these can be huge.
776 EVT getShiftAmountTy(EVT LHSTy) {
777 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
778 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
779 }
780
781 /// This method returns true if we are running before type legalization or
782 /// if the specified VT is legal.
783 bool isTypeLegal(const EVT &VT) {
784 if (!LegalTypes) return true;
785 return TLI.isTypeLegal(VT);
786 }
787
788 /// Convenience wrapper around TargetLowering::getSetCCResultType
789 EVT getSetCCResultType(EVT VT) const {
790 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
791 }
792
795 ISD::NodeType ExtType);
796 };
797
798/// This class is a DAGUpdateListener that removes any deleted
799/// nodes from the worklist.
800class WorklistRemover : public SelectionDAG::DAGUpdateListener {
801 DAGCombiner &DC;
802
803public:
804 explicit WorklistRemover(DAGCombiner &dc)
805 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
806
807 void NodeDeleted(SDNode *N, SDNode *E) override {
808 DC.removeFromWorklist(N);
809 }
810};
811
812class WorklistInserter : public SelectionDAG::DAGUpdateListener {
813 DAGCombiner &DC;
814
815public:
816 explicit WorklistInserter(DAGCombiner &dc)
817 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
818
819 // FIXME: Ideally we could add N to the worklist, but this causes exponential
820 // compile time costs in large DAGs, e.g. Halide.
821 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
822};
823
824} // end anonymous namespace
825
826//===----------------------------------------------------------------------===//
827// TargetLowering::DAGCombinerInfo implementation
828//===----------------------------------------------------------------------===//
829
831 ((DAGCombiner*)DC)->AddToWorklist(N);
832}
833
836 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
837}
838
840CombineTo(SDNode *N, SDValue Res, bool AddTo) {
841 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
842}
843
846 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
847}
848
851 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
852}
853
856 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
857}
858
859//===----------------------------------------------------------------------===//
860// Helper Functions
861//===----------------------------------------------------------------------===//
862
863void DAGCombiner::deleteAndRecombine(SDNode *N) {
865
866 // If the operands of this node are only used by the node, they will now be
867 // dead. Make sure to re-visit them and recursively delete dead nodes.
868 for (const SDValue &Op : N->ops())
869 // For an operand generating multiple values, one of the values may
870 // become dead allowing further simplification (e.g. split index
871 // arithmetic from an indexed load).
872 if (Op->hasOneUse() || Op->getNumValues() > 1)
873 AddToWorklist(Op.getNode());
874
875 DAG.DeleteNode(N);
876}
877
878// APInts must be the same size for most operations, this helper
879// function zero extends the shorter of the pair so that they match.
880// We provide an Offset so that we can create bitwidths that won't overflow.
881static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
882 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
883 LHS = LHS.zextOrSelf(Bits);
884 RHS = RHS.zextOrSelf(Bits);
885}
886
887// Return true if this node is a setcc, or is a select_cc
888// that selects between the target values used for true and false, making it
889// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
890// the appropriate nodes based on the type of node we are checking. This
891// simplifies life a bit for the callers.
892bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
893 SDValue &CC, bool MatchStrict) const {
894 if (N.getOpcode() == ISD::SETCC) {
895 LHS = N.getOperand(0);
896 RHS = N.getOperand(1);
897 CC = N.getOperand(2);
898 return true;
899 }
900
901 if (MatchStrict &&
902 (N.getOpcode() == ISD::STRICT_FSETCC ||
903 N.getOpcode() == ISD::STRICT_FSETCCS)) {
904 LHS = N.getOperand(1);
905 RHS = N.getOperand(2);
906 CC = N.getOperand(3);
907 return true;
908 }
909
910 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
911 !TLI.isConstFalseVal(N.getOperand(3)))
912 return false;
913
914 if (TLI.getBooleanContents(N.getValueType()) ==
916 return false;
917
918 LHS = N.getOperand(0);
919 RHS = N.getOperand(1);
920 CC = N.getOperand(4);
921 return true;
922}
923
924/// Return true if this is a SetCC-equivalent operation with only one use.
925/// If this is true, it allows the users to invert the operation for free when
926/// it is profitable to do so.
927bool DAGCombiner::isOneUseSetCC(SDValue N) const {
928 SDValue N0, N1, N2;
929 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
930 return true;
931 return false;
932}
933
935 if (!ScalarTy.isSimple())
936 return false;
937
939 switch (ScalarTy.getSimpleVT().SimpleTy) {
940 case MVT::i8:
941 MaskForTy = 0xFFULL;
942 break;
943 case MVT::i16:
944 MaskForTy = 0xFFFFULL;
945 break;
946 case MVT::i32:
947 MaskForTy = 0xFFFFFFFFULL;
948 break;
949 default:
950 return false;
951 break;
952 }
953
954 APInt Val;
956 return Val.getLimitedValue() == MaskForTy;
957
958 return false;
959}
960
961// Determines if it is a constant integer or a splat/build vector of constant
962// integers (and undefs).
963// Do not permit build vector implicit truncation.
964static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
966 return !(Const->isOpaque() && NoOpaques);
967 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
968 return false;
969 unsigned BitWidth = N.getScalarValueSizeInBits();
970 for (const SDValue &Op : N->op_values()) {
971 if (Op.isUndef())
972 continue;
974 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
975 (Const->isOpaque() && NoOpaques))
976 return false;
977 }
978 return true;
979}
980
981// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
982// undef's.
983static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
984 if (V.getOpcode() != ISD::BUILD_VECTOR)
985 return false;
988}
989
990// Determine if this an indexed load with an opaque target constant index.
991static bool canSplitIdx(LoadSDNode *LD) {
992 return MaySplitLoadIndex &&
993 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
994 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
995}
996
997bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
998 const SDLoc &DL,
999 SDValue N0,
1000 SDValue N1) {
1001 // Currently this only tries to ensure we don't undo the GEP splits done by
1002 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1003 // we check if the following transformation would be problematic:
1004 // (load/store (add, (add, x, offset1), offset2)) ->
1005 // (load/store (add, x, offset1+offset2)).
1006
1007 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1008 return false;
1009
1010 if (N0.hasOneUse())
1011 return false;
1012
1013 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1015 if (!C1 || !C2)
1016 return false;
1017
1018 const APInt &C1APIntVal = C1->getAPIntValue();
1019 const APInt &C2APIntVal = C2->getAPIntValue();
1020 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1021 return false;
1022
1024 if (CombinedValueIntVal.getBitWidth() > 64)
1025 return false;
1026 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1027
1028 for (SDNode *Node : N0->uses()) {
1030 if (LoadStore) {
1031 // Is x[offset2] already not a legal addressing mode? If so then
1032 // reassociating the constants breaks nothing (we test offset2 because
1033 // that's the one we hope to fold into the load or store).
1035 AM.HasBaseReg = true;
1036 AM.BaseOffs = C2APIntVal.getSExtValue();
1037 EVT VT = LoadStore->getMemoryVT();
1038 unsigned AS = LoadStore->getAddressSpace();
1039 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1040 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1041 continue;
1042
1043 // Would x[offset1+offset2] still be a legal addressing mode?
1045 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1046 return true;
1047 }
1048 }
1049
1050 return false;
1051}
1052
1053// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1054// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1055SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1056 SDValue N0, SDValue N1) {
1057 EVT VT = N0.getValueType();
1058
1059 if (N0.getOpcode() != Opc)
1060 return SDValue();
1061
1062 SDValue N00 = N0.getOperand(0);
1063 SDValue N01 = N0.getOperand(1);
1064
1067 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1069 return DAG.getNode(Opc, DL, VT, N00, OpNode);
1070 return SDValue();
1071 }
1072 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1073 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074 // iff (op x, c1) has one use
1075 if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
1076 return DAG.getNode(Opc, DL, VT, OpNode, N01);
1077 return SDValue();
1078 }
1079 }
1080 return SDValue();
1081}
1082
1083// Try to reassociate commutative binops.
1084SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1085 SDValue N1, SDNodeFlags Flags) {
1086 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1087
1088 // Floating-point reassociation is not allowed without loose FP math.
1089 if (N0.getValueType().isFloatingPoint() ||
1090 N1.getValueType().isFloatingPoint())
1091 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1092 return SDValue();
1093
1095 return Combined;
1097 return Combined;
1098 return SDValue();
1099}
1100
1101SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1102 bool AddTo) {
1103 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1104 ++NodesCombined;
1105 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1106 To[0].getNode()->dump(&DAG);
1107 dbgs() << " and " << NumTo - 1 << " other values\n");
1108 for (unsigned i = 0, e = NumTo; i != e; ++i)
1109 assert((!To[i].getNode() ||
1110 N->getValueType(i) == To[i].getValueType()) &&
1111 "Cannot combine value to value of different type!");
1112
1113 WorklistRemover DeadNodes(*this);
1114 DAG.ReplaceAllUsesWith(N, To);
1115 if (AddTo) {
1116 // Push the new nodes and any users onto the worklist
1117 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1118 if (To[i].getNode()) {
1119 AddToWorklist(To[i].getNode());
1120 AddUsersToWorklist(To[i].getNode());
1121 }
1122 }
1123 }
1124
1125 // Finally, if the node is now dead, remove it from the graph. The node
1126 // may not be dead if the replacement process recursively simplified to
1127 // something else needing this node.
1128 if (N->use_empty())
1130 return SDValue(N, 0);
1131}
1132
1133void DAGCombiner::
1134CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1135 // Replace the old value with the new one.
1136 ++NodesCombined;
1137 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1138 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1139 dbgs() << '\n');
1140
1141 // Replace all uses. If any nodes become isomorphic to other nodes and
1142 // are deleted, make sure to remove them from our worklist.
1143 WorklistRemover DeadNodes(*this);
1144 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1145
1146 // Push the new node and any (possibly new) users onto the worklist.
1147 AddToWorklistWithUsers(TLO.New.getNode());
1148
1149 // Finally, if the node is now dead, remove it from the graph. The node
1150 // may not be dead if the replacement process recursively simplified to
1151 // something else needing this node.
1152 if (TLO.Old.getNode()->use_empty())
1153 deleteAndRecombine(TLO.Old.getNode());
1154}
1155
1156/// Check the specified integer node value to see if it can be simplified or if
1157/// things it uses can be simplified by bit propagation. If so, return true.
1158bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1159 const APInt &DemandedElts,
1160 bool AssumeSingleUse) {
1161 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1162 KnownBits Known;
1163 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165 return false;
1166
1167 // Revisit the node.
1168 AddToWorklist(Op.getNode());
1169
1170 CommitTargetLoweringOpt(TLO);
1171 return true;
1172}
1173
1174/// Check the specified vector node value to see if it can be simplified or
1175/// if things it uses can be simplified as it only uses some of the elements.
1176/// If so, return true.
1177bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1178 const APInt &DemandedElts,
1179 bool AssumeSingleUse) {
1180 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1183 TLO, 0, AssumeSingleUse))
1184 return false;
1185
1186 // Revisit the node.
1187 AddToWorklist(Op.getNode());
1188
1189 CommitTargetLoweringOpt(TLO);
1190 return true;
1191}
1192
1193void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1194 SDLoc DL(Load);
1195 EVT VT = Load->getValueType(0);
1196 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1197
1198 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1199 Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1200 WorklistRemover DeadNodes(*this);
1201 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203 deleteAndRecombine(Load);
1204 AddToWorklist(Trunc.getNode());
1205}
1206
1207SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1208 Replace = false;
1209 SDLoc DL(Op);
1210 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212 EVT MemVT = LD->getMemoryVT();
1214 : LD->getExtensionType();
1215 Replace = true;
1216 return DAG.getExtLoad(ExtType, DL, PVT,
1217 LD->getChain(), LD->getBasePtr(),
1218 MemVT, LD->getMemOperand());
1219 }
1220
1221 unsigned Opc = Op.getOpcode();
1222 switch (Opc) {
1223 default: break;
1224 case ISD::AssertSext:
1225 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1226 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1227 break;
1228 case ISD::AssertZext:
1229 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1230 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1231 break;
1232 case ISD::Constant: {
1233 unsigned ExtOpc =
1234 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1235 return DAG.getNode(ExtOpc, DL, PVT, Op);
1236 }
1237 }
1238
1240 return SDValue();
1241 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1242}
1243
1244SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1246 return SDValue();
1247 EVT OldVT = Op.getValueType();
1248 SDLoc DL(Op);
1249 bool Replace = false;
1251 if (!NewOp.getNode())
1252 return SDValue();
1253 AddToWorklist(NewOp.getNode());
1254
1255 if (Replace)
1256 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1257 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1258 DAG.getValueType(OldVT));
1259}
1260
1261SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1262 EVT OldVT = Op.getValueType();
1263 SDLoc DL(Op);
1264 bool Replace = false;
1266 if (!NewOp.getNode())
1267 return SDValue();
1268 AddToWorklist(NewOp.getNode());
1269
1270 if (Replace)
1271 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1272 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1273}
1274
1275/// Promote the specified integer binary operation if the target indicates it is
1276/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1277/// i32 since i16 instructions are longer.
1278SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1279 if (!LegalOperations)
1280 return SDValue();
1281
1282 EVT VT = Op.getValueType();
1283 if (VT.isVector() || !VT.isInteger())
1284 return SDValue();
1285
1286 // If operation type is 'undesirable', e.g. i16 on x86, consider
1287 // promoting it.
1288 unsigned Opc = Op.getOpcode();
1289 if (TLI.isTypeDesirableForOp(Opc, VT))
1290 return SDValue();
1291
1292 EVT PVT = VT;
1293 // Consult target whether it is a good idea to promote this operation and
1294 // what's the right type to promote it to.
1295 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1296 assert(PVT != VT && "Don't know what type to promote to!");
1297
1298 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1299
1300 bool Replace0 = false;
1301 SDValue N0 = Op.getOperand(0);
1303
1304 bool Replace1 = false;
1305 SDValue N1 = Op.getOperand(1);
1307 SDLoc DL(Op);
1308
1309 SDValue RV =
1310 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1311
1312 // We are always replacing N0/N1's use in N and only need additional
1313 // replacements if there are additional uses.
1314 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1315 // (SDValue) here because the node may reference multiple values
1316 // (for example, the chain value of a load node).
1317 Replace0 &= !N0->hasOneUse();
1318 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1319
1320 // Combine Op here so it is preserved past replacements.
1321 CombineTo(Op.getNode(), RV);
1322
1323 // If operands have a use ordering, make sure we deal with
1324 // predecessor first.
1325 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1326 std::swap(N0, N1);
1327 std::swap(NN0, NN1);
1328 }
1329
1330 if (Replace0) {
1331 AddToWorklist(NN0.getNode());
1332 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1333 }
1334 if (Replace1) {
1335 AddToWorklist(NN1.getNode());
1336 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1337 }
1338 return Op;
1339 }
1340 return SDValue();
1341}
1342
1343/// Promote the specified integer shift operation if the target indicates it is
1344/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1345/// i32 since i16 instructions are longer.
1346SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1347 if (!LegalOperations)
1348 return SDValue();
1349
1350 EVT VT = Op.getValueType();
1351 if (VT.isVector() || !VT.isInteger())
1352 return SDValue();
1353
1354 // If operation type is 'undesirable', e.g. i16 on x86, consider
1355 // promoting it.
1356 unsigned Opc = Op.getOpcode();
1357 if (TLI.isTypeDesirableForOp(Opc, VT))
1358 return SDValue();
1359
1360 EVT PVT = VT;
1361 // Consult target whether it is a good idea to promote this operation and
1362 // what's the right type to promote it to.
1363 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1364 assert(PVT != VT && "Don't know what type to promote to!");
1365
1366 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1367
1368 bool Replace = false;
1369 SDValue N0 = Op.getOperand(0);
1370 SDValue N1 = Op.getOperand(1);
1371 if (Opc == ISD::SRA)
1372 N0 = SExtPromoteOperand(N0, PVT);
1373 else if (Opc == ISD::SRL)
1374 N0 = ZExtPromoteOperand(N0, PVT);
1375 else
1376 N0 = PromoteOperand(N0, PVT, Replace);
1377
1378 if (!N0.getNode())
1379 return SDValue();
1380
1381 SDLoc DL(Op);
1382 SDValue RV =
1383 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1384
1385 if (Replace)
1386 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1387
1388 // Deal with Op being deleted.
1389 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1390 return RV;
1391 }
1392 return SDValue();
1393}
1394
1395SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1396 if (!LegalOperations)
1397 return SDValue();
1398
1399 EVT VT = Op.getValueType();
1400 if (VT.isVector() || !VT.isInteger())
1401 return SDValue();
1402
1403 // If operation type is 'undesirable', e.g. i16 on x86, consider
1404 // promoting it.
1405 unsigned Opc = Op.getOpcode();
1406 if (TLI.isTypeDesirableForOp(Opc, VT))
1407 return SDValue();
1408
1409 EVT PVT = VT;
1410 // Consult target whether it is a good idea to promote this operation and
1411 // what's the right type to promote it to.
1412 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1413 assert(PVT != VT && "Don't know what type to promote to!");
1414 // fold (aext (aext x)) -> (aext x)
1415 // fold (aext (zext x)) -> (zext x)
1416 // fold (aext (sext x)) -> (sext x)
1417 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1418 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1419 }
1420 return SDValue();
1421}
1422
1423bool DAGCombiner::PromoteLoad(SDValue Op) {
1424 if (!LegalOperations)
1425 return false;
1426
1427 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1428 return false;
1429
1430 EVT VT = Op.getValueType();
1431 if (VT.isVector() || !VT.isInteger())
1432 return false;
1433
1434 // If operation type is 'undesirable', e.g. i16 on x86, consider
1435 // promoting it.
1436 unsigned Opc = Op.getOpcode();
1437 if (TLI.isTypeDesirableForOp(Opc, VT))
1438 return false;
1439
1440 EVT PVT = VT;
1441 // Consult target whether it is a good idea to promote this operation and
1442 // what's the right type to promote it to.
1443 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1444 assert(PVT != VT && "Don't know what type to promote to!");
1445
1446 SDLoc DL(Op);
1447 SDNode *N = Op.getNode();
1449 EVT MemVT = LD->getMemoryVT();
1451 : LD->getExtensionType();
1452 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1453 LD->getChain(), LD->getBasePtr(),
1454 MemVT, LD->getMemOperand());
1456
1457 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1458 Result.getNode()->dump(&DAG); dbgs() << '\n');
1459 WorklistRemover DeadNodes(*this);
1460 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1461 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463 AddToWorklist(Result.getNode());
1464 return true;
1465 }
1466 return false;
1467}
1468
1469/// Recursively delete a node which has no uses and any operands for
1470/// which it is the only use.
1471///
1472/// Note that this both deletes the nodes and removes them from the worklist.
1473/// It also adds any nodes who have had a user deleted to the worklist as they
1474/// may now have only one use and subject to other combines.
1475bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1476 if (!N->use_empty())
1477 return false;
1478
1480 Nodes.insert(N);
1481 do {
1482 N = Nodes.pop_back_val();
1483 if (!N)
1484 continue;
1485
1486 if (N->use_empty()) {
1487 for (const SDValue &ChildN : N->op_values())
1488 Nodes.insert(ChildN.getNode());
1489
1491 DAG.DeleteNode(N);
1492 } else {
1493 AddToWorklist(N);
1494 }
1495 } while (!Nodes.empty());
1496 return true;
1497}
1498
1499//===----------------------------------------------------------------------===//
1500// Main DAG Combiner implementation
1501//===----------------------------------------------------------------------===//
1502
1503void DAGCombiner::Run(CombineLevel AtLevel) {
1504 // set the instance variables, so that the various visit routines may use it.
1505 Level = AtLevel;
1506 LegalDAG = Level >= AfterLegalizeDAG;
1507 LegalOperations = Level >= AfterLegalizeVectorOps;
1508 LegalTypes = Level >= AfterLegalizeTypes;
1509
1510 WorklistInserter AddNodes(*this);
1511
1512 // Add all the dag nodes to the worklist.
1513 for (SDNode &Node : DAG.allnodes())
1514 AddToWorklist(&Node);
1515
1516 // Create a dummy node (which is not added to allnodes), that adds a reference
1517 // to the root node, preventing it from being deleted, and tracking any
1518 // changes of the root.
1519 HandleSDNode Dummy(DAG.getRoot());
1520
1521 // While we have a valid worklist entry node, try to combine it.
1522 while (SDNode *N = getNextWorklistEntry()) {
1523 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1524 // N is deleted from the DAG, since they too may now be dead or may have a
1525 // reduced number of uses, allowing other xforms.
1526 if (recursivelyDeleteUnusedNodes(N))
1527 continue;
1528
1529 WorklistRemover DeadNodes(*this);
1530
1531 // If this combine is running after legalizing the DAG, re-legalize any
1532 // nodes pulled off the worklist.
1533 if (LegalDAG) {
1534 SmallSetVector<SDNode *, 16> UpdatedNodes;
1535 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1536
1537 for (SDNode *LN : UpdatedNodes)
1539
1540 if (!NIsValid)
1541 continue;
1542 }
1543
1544 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1545
1546 // Add any operands of the new node which have not yet been combined to the
1547 // worklist as well. Because the worklist uniques things already, this
1548 // won't repeatedly process the same operand.
1549 CombinedNodes.insert(N);
1550 for (const SDValue &ChildN : N->op_values())
1551 if (!CombinedNodes.count(ChildN.getNode()))
1552 AddToWorklist(ChildN.getNode());
1553
1554 SDValue RV = combine(N);
1555
1556 if (!RV.getNode())
1557 continue;
1558
1559 ++NodesCombined;
1560
1561 // If we get back the same node we passed in, rather than a new node or
1562 // zero, we know that the node must have defined multiple values and
1563 // CombineTo was used. Since CombineTo takes care of the worklist
1564 // mechanics for us, we have no work to do in this case.
1565 if (RV.getNode() == N)
1566 continue;
1567
1568 assert(N->getOpcode() != ISD::DELETED_NODE &&
1569 RV.getOpcode() != ISD::DELETED_NODE &&
1570 "Node was deleted but visit returned new node!");
1571
1572 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1573
1574 if (N->getNumValues() == RV.getNode()->getNumValues())
1575 DAG.ReplaceAllUsesWith(N, RV.getNode());
1576 else {
1577 assert(N->getValueType(0) == RV.getValueType() &&
1578 N->getNumValues() == 1 && "Type mismatch");
1579 DAG.ReplaceAllUsesWith(N, &RV);
1580 }
1581
1582 // Push the new node and any users onto the worklist. Omit this if the
1583 // new node is the EntryToken (e.g. if a store managed to get optimized
1584 // out), because re-visiting the EntryToken and its users will not uncover
1585 // any additional opportunities, but there may be a large number of such
1586 // users, potentially causing compile time explosion.
1587 if (RV.getOpcode() != ISD::EntryToken) {
1588 AddToWorklist(RV.getNode());
1589 AddUsersToWorklist(RV.getNode());
1590 }
1591
1592 // Finally, if the node is now dead, remove it from the graph. The node
1593 // may not be dead if the replacement process recursively simplified to
1594 // something else needing this node. This will also take care of adding any
1595 // operands which have lost a user to the worklist.
1596 recursivelyDeleteUnusedNodes(N);
1597 }
1598
1599 // If the root changed (e.g. it was a dead load, update the root).
1600 DAG.setRoot(Dummy.getValue());
1601 DAG.RemoveDeadNodes();
1602}
1603
1604SDValue DAGCombiner::visit(SDNode *N) {
1605 switch (N->getOpcode()) {
1606 default: break;
1607 case ISD::TokenFactor: return visitTokenFactor(N);
1609 case ISD::ADD: return visitADD(N);
1610 case ISD::SUB: return visitSUB(N);
1611 case ISD::SADDSAT:
1612 case ISD::UADDSAT: return visitADDSAT(N);
1613 case ISD::SSUBSAT:
1614 case ISD::USUBSAT: return visitSUBSAT(N);
1615 case ISD::ADDC: return visitADDC(N);
1616 case ISD::SADDO:
1617 case ISD::UADDO: return visitADDO(N);
1618 case ISD::SUBC: return visitSUBC(N);
1619 case ISD::SSUBO:
1620 case ISD::USUBO: return visitSUBO(N);
1621 case ISD::ADDE: return visitADDE(N);
1622 case ISD::ADDCARRY: return visitADDCARRY(N);
1623 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1624 case ISD::SUBE: return visitSUBE(N);
1625 case ISD::SUBCARRY: return visitSUBCARRY(N);
1626 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1627 case ISD::SMULFIX:
1628 case ISD::SMULFIXSAT:
1629 case ISD::UMULFIX:
1630 case ISD::UMULFIXSAT: return visitMULFIX(N);
1631 case ISD::MUL: return visitMUL(N);
1632 case ISD::SDIV: return visitSDIV(N);
1633 case ISD::UDIV: return visitUDIV(N);
1634 case ISD::SREM:
1635 case ISD::UREM: return visitREM(N);
1636 case ISD::MULHU: return visitMULHU(N);
1637 case ISD::MULHS: return visitMULHS(N);
1638 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1639 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1640 case ISD::SMULO:
1641 case ISD::UMULO: return visitMULO(N);
1642 case ISD::SMIN:
1643 case ISD::SMAX:
1644 case ISD::UMIN:
1645 case ISD::UMAX: return visitIMINMAX(N);
1646 case ISD::AND: return visitAND(N);
1647 case ISD::OR: return visitOR(N);
1648 case ISD::XOR: return visitXOR(N);
1649 case ISD::SHL: return visitSHL(N);
1650 case ISD::SRA: return visitSRA(N);
1651 case ISD::SRL: return visitSRL(N);
1652 case ISD::ROTR:
1653 case ISD::ROTL: return visitRotate(N);
1654 case ISD::FSHL:
1655 case ISD::FSHR: return visitFunnelShift(N);
1656 case ISD::SSHLSAT:
1657 case ISD::USHLSAT: return visitSHLSAT(N);
1658 case ISD::ABS: return visitABS(N);
1659 case ISD::BSWAP: return visitBSWAP(N);
1660 case ISD::BITREVERSE: return visitBITREVERSE(N);
1661 case ISD::CTLZ: return visitCTLZ(N);
1663 case ISD::CTTZ: return visitCTTZ(N);
1665 case ISD::CTPOP: return visitCTPOP(N);
1666 case ISD::SELECT: return visitSELECT(N);
1667 case ISD::VSELECT: return visitVSELECT(N);
1668 case ISD::SELECT_CC: return visitSELECT_CC(N);
1669 case ISD::SETCC: return visitSETCC(N);
1670 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1671 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1672 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1673 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1674 case ISD::AssertSext:
1675 case ISD::AssertZext: return visitAssertExt(N);
1676 case ISD::AssertAlign: return visitAssertAlign(N);
1680 case ISD::TRUNCATE: return visitTRUNCATE(N);
1681 case ISD::BITCAST: return visitBITCAST(N);
1682 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1683 case ISD::FADD: return visitFADD(N);
1684 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1685 case ISD::FSUB: return visitFSUB(N);
1686 case ISD::FMUL: return visitFMUL(N);
1687 case ISD::FMA: return visitFMA(N);
1688 case ISD::FDIV: return visitFDIV(N);
1689 case ISD::FREM: return visitFREM(N);
1690 case ISD::FSQRT: return visitFSQRT(N);
1691 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1692 case ISD::FPOW: return visitFPOW(N);
1693 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1694 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1695 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1696 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1697 case ISD::FP_ROUND: return visitFP_ROUND(N);
1698 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1699 case ISD::FNEG: return visitFNEG(N);
1700 case ISD::FABS: return visitFABS(N);
1701 case ISD::FFLOOR: return visitFFLOOR(N);
1702 case ISD::FMINNUM:
1703 case ISD::FMAXNUM:
1704 case ISD::FMINIMUM:
1705 case ISD::FMAXIMUM: return visitFMinMax(N);
1706 case ISD::FCEIL: return visitFCEIL(N);
1707 case ISD::FTRUNC: return visitFTRUNC(N);
1708 case ISD::BRCOND: return visitBRCOND(N);
1709 case ISD::BR_CC: return visitBR_CC(N);
1710 case ISD::LOAD: return visitLOAD(N);
1711 case ISD::STORE: return visitSTORE(N);
1720 case ISD::MGATHER: return visitMGATHER(N);
1721 case ISD::MLOAD: return visitMLOAD(N);
1722 case ISD::MSCATTER: return visitMSCATTER(N);
1723 case ISD::MSTORE: return visitMSTORE(N);
1725 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1726 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1727 case ISD::FREEZE: return visitFREEZE(N);
1730 case ISD::VECREDUCE_ADD:
1731 case ISD::VECREDUCE_MUL:
1732 case ISD::VECREDUCE_AND:
1733 case ISD::VECREDUCE_OR:
1734 case ISD::VECREDUCE_XOR:
1740 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1741#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1742#include "llvm/IR/VPIntrinsics.def"
1743 return visitVPOp(N);
1744 }
1745 return SDValue();
1746}
1747
1748SDValue DAGCombiner::combine(SDNode *N) {
1749 SDValue RV;
1750 if (!DisableGenericCombines)
1751 RV = visit(N);
1752
1753 // If nothing happened, try a target-specific DAG combine.
1754 if (!RV.getNode()) {
1755 assert(N->getOpcode() != ISD::DELETED_NODE &&
1756 "Node was deleted but visit returned NULL!");
1757
1758 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1759 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1760
1761 // Expose the DAG combiner to the target combiner impls.
1763 DagCombineInfo(DAG, Level, false, this);
1764
1766 }
1767 }
1768
1769 // If nothing happened still, try promoting the operation.
1770 if (!RV.getNode()) {
1771 switch (N->getOpcode()) {
1772 default: break;
1773 case ISD::ADD:
1774 case ISD::SUB:
1775 case ISD::MUL:
1776 case ISD::AND:
1777 case ISD::OR:
1778 case ISD::XOR:
1779 RV = PromoteIntBinOp(SDValue(N, 0));
1780 break;
1781 case ISD::SHL:
1782 case ISD::SRA:
1783 case ISD::SRL:
1785 break;
1786 case ISD::SIGN_EXTEND:
1787 case ISD::ZERO_EXTEND:
1788 case ISD::ANY_EXTEND:
1789 RV = PromoteExtend(SDValue(N, 0));
1790 break;
1791 case ISD::LOAD:
1792 if (PromoteLoad(SDValue(N, 0)))
1793 RV = SDValue(N, 0);
1794 break;
1795 }
1796 }
1797
1798 // If N is a commutative binary node, try to eliminate it if the commuted
1799 // version is already present in the DAG.
1800 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1801 N->getNumValues() == 1) {
1802 SDValue N0 = N->getOperand(0);
1803 SDValue N1 = N->getOperand(1);
1804
1805 // Constant operands are canonicalized to RHS.
1806 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1807 SDValue Ops[] = {N1, N0};
1808 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1809 N->getFlags());
1810 if (CSENode)
1811 return SDValue(CSENode, 0);
1812 }
1813 }
1814
1815 return RV;
1816}
1817
1818/// Given a node, return its input chain if it has one, otherwise return a null
1819/// sd operand.
1821 if (unsigned NumOps = N->getNumOperands()) {
1822 if (N->getOperand(0).getValueType() == MVT::Other)
1823 return N->getOperand(0);
1824 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1825 return N->getOperand(NumOps-1);
1826 for (unsigned i = 1; i < NumOps-1; ++i)
1827 if (N->getOperand(i).getValueType() == MVT::Other)
1828 return N->getOperand(i);
1829 }
1830 return SDValue();
1831}
1832
1833SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1834 // If N has two operands, where one has an input chain equal to the other,
1835 // the 'other' chain is redundant.
1836 if (N->getNumOperands() == 2) {
1837 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1838 return N->getOperand(0);
1839 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1840 return N->getOperand(1);
1841 }
1842
1843 // Don't simplify token factors if optnone.
1844 if (OptLevel == CodeGenOpt::None)
1845 return SDValue();
1846
1847 // Don't simplify the token factor if the node itself has too many operands.
1848 if (N->getNumOperands() > TokenFactorInlineLimit)
1849 return SDValue();
1850
1851 // If the sole user is a token factor, we should make sure we have a
1852 // chance to merge them together. This prevents TF chains from inhibiting
1853 // optimizations.
1854 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1855 AddToWorklist(*(N->use_begin()));
1856
1857 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1858 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1860 bool Changed = false; // If we should replace this token factor.
1861
1862 // Start out with this token factor.
1863 TFs.push_back(N);
1864
1865 // Iterate through token factors. The TFs grows when new token factors are
1866 // encountered.
1867 for (unsigned i = 0; i < TFs.size(); ++i) {
1868 // Limit number of nodes to inline, to avoid quadratic compile times.
1869 // We have to add the outstanding Token Factors to Ops, otherwise we might
1870 // drop Ops from the resulting Token Factors.
1871 if (Ops.size() > TokenFactorInlineLimit) {
1872 for (unsigned j = i; j < TFs.size(); j++)
1873 Ops.emplace_back(TFs[j], 0);
1874 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1875 // combiner worklist later.
1876 TFs.resize(i);
1877 break;
1878 }
1879
1880 SDNode *TF = TFs[i];
1881 // Check each of the operands.
1882 for (const SDValue &Op : TF->op_values()) {
1883 switch (Op.getOpcode()) {
1884 case ISD::EntryToken:
1885 // Entry tokens don't need to be added to the list. They are
1886 // redundant.
1887 Changed = true;
1888 break;
1889
1890 case ISD::TokenFactor:
1891 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1892 // Queue up for processing.
1893 TFs.push_back(Op.getNode());
1894 Changed = true;
1895 break;
1896 }
1898
1899 default:
1900 // Only add if it isn't already in the list.
1901 if (SeenOps.insert(Op.getNode()).second)
1902 Ops.push_back(Op);
1903 else
1904 Changed = true;
1905 break;
1906 }
1907 }
1908 }
1909
1910 // Re-visit inlined Token Factors, to clean them up in case they have been
1911 // removed. Skip the first Token Factor, as this is the current node.
1912 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1913 AddToWorklist(TFs[i]);
1914
1915 // Remove Nodes that are chained to another node in the list. Do so
1916 // by walking up chains breath-first stopping when we've seen
1917 // another operand. In general we must climb to the EntryNode, but we can exit
1918 // early if we find all remaining work is associated with just one operand as
1919 // no further pruning is possible.
1920
1921 // List of nodes to search through and original Ops from which they originate.
1923 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1925 bool DidPruneOps = false;
1926
1927 unsigned NumLeftToConsider = 0;
1928 for (const SDValue &Op : Ops) {
1929 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1930 OpWorkCount.push_back(1);
1931 }
1932
1933 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1934 // If this is an Op, we can remove the op from the list. Remark any
1935 // search associated with it as from the current OpNumber.
1936 if (SeenOps.contains(Op)) {
1937 Changed = true;
1938 DidPruneOps = true;
1939 unsigned OrigOpNumber = 0;
1940 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1941 OrigOpNumber++;
1942 assert((OrigOpNumber != Ops.size()) &&
1943 "expected to find TokenFactor Operand");
1944 // Re-mark worklist from OrigOpNumber to OpNumber
1945 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1946 if (Worklist[i].second == OrigOpNumber) {
1947 Worklist[i].second = OpNumber;
1948 }
1949 }
1953 }
1954 // Add if it's a new chain
1955 if (SeenChains.insert(Op).second) {
1957 Worklist.push_back(std::make_pair(Op, OpNumber));
1958 }
1959 };
1960
1961 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1962 // We need at least be consider at least 2 Ops to prune.
1963 if (NumLeftToConsider <= 1)
1964 break;
1965 auto CurNode = Worklist[i].first;
1966 auto CurOpNumber = Worklist[i].second;
1968 "Node should not appear in worklist");
1969 switch (CurNode->getOpcode()) {
1970 case ISD::EntryToken:
1971 // Hitting EntryToken is the only way for the search to terminate without
1972 // hitting
1973 // another operand's search. Prevent us from marking this operand
1974 // considered.
1976 break;
1977 case ISD::TokenFactor:
1978 for (const SDValue &Op : CurNode->op_values())
1979 AddToWorklist(i, Op.getNode(), CurOpNumber);
1980 break;
1982 case ISD::LIFETIME_END:
1983 case ISD::CopyFromReg:
1984 case ISD::CopyToReg:
1985 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1986 break;
1987 default:
1988 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1989 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1990 break;
1991 }
1993 if (OpWorkCount[CurOpNumber] == 0)
1995 }
1996
1997 // If we've changed things around then replace token factor.
1998 if (Changed) {
2000 if (Ops.empty()) {
2001 // The entry token is the only possible outcome.
2002 Result = DAG.getEntryNode();
2003 } else {
2004 if (DidPruneOps) {
2006 //
2007 for (const SDValue &Op : Ops) {
2008 if (SeenChains.count(Op.getNode()) == 0)
2009 PrunedOps.push_back(Op);
2010 }
2012 } else {
2013 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2014 }
2015 }
2016 return Result;
2017 }
2018 return SDValue();
2019}
2020
2021/// MERGE_VALUES can always be eliminated.
2022SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2023 WorklistRemover DeadNodes(*this);
2024 // Replacing results may cause a different MERGE_VALUES to suddenly
2025 // be CSE'd with N, and carry its uses with it. Iterate until no
2026 // uses remain, to ensure that the node can be safely deleted.
2027 // First add the users of this node to the work list so that they
2028 // can be tried again once they have new operands.
2030 do {
2031 // Do as a single replacement to avoid rewalking use lists.
2033 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2034 Ops.push_back(N->getOperand(i));
2035 DAG.ReplaceAllUsesWith(N, Ops.data());
2036 } while (!N->use_empty());
2038 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2039}
2040
2041/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2042/// ConstantSDNode pointer else nullptr.
2045 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2046}
2047
2048/// Return true if 'Use' is a load or a store that uses N as its base pointer
2049/// and that N may be folded in the load / store addressing mode.
2051 const TargetLowering &TLI) {
2052 EVT VT;
2053 unsigned AS;
2054
2055 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2056 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2057 return false;
2058 VT = LD->getMemoryVT();
2059 AS = LD->getAddressSpace();
2060 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2061 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2062 return false;
2063 VT = ST->getMemoryVT();
2064 AS = ST->getAddressSpace();
2066 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2067 return false;
2068 VT = LD->getMemoryVT();
2069 AS = LD->getAddressSpace();
2071 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2072 return false;
2073 VT = ST->getMemoryVT();
2074 AS = ST->getAddressSpace();
2075 } else
2076 return false;
2077
2079 if (N->getOpcode() == ISD::ADD) {
2080 AM.HasBaseReg = true;
2082 if (Offset)
2083 // [reg +/- imm]
2084 AM.BaseOffs = Offset->getSExtValue();
2085 else
2086 // [reg +/- reg]
2087 AM.Scale = 1;
2088 } else if (N->getOpcode() == ISD::SUB) {
2089 AM.HasBaseReg = true;
2091 if (Offset)
2092 // [reg +/- imm]
2093 AM.BaseOffs = -Offset->getSExtValue();
2094 else
2095 // [reg +/- reg]
2096 AM.Scale = 1;
2097 } else
2098 return false;
2099
2100 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2101 VT.getTypeForEVT(*DAG.getContext()), AS);
2102}
2103
2104/// This inverts a canonicalization in IR that replaces a variable select arm
2105/// with an identity constant. Codegen improves if we re-use the variable
2106/// operand rather than load a constant. This can also be converted into a
2107/// masked vector operation if the target supports it.
2109 bool ShouldCommuteOperands) {
2110 // Match a select as operand 1. The identity constant that we are looking for
2111 // is only valid as operand 1 of a non-commutative binop.
2112 SDValue N0 = N->getOperand(0);
2113 SDValue N1 = N->getOperand(1);
2115 std::swap(N0, N1);
2116
2117 // TODO: Should this apply to scalar select too?
2118 if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
2119 return SDValue();
2120
2121 unsigned Opcode = N->getOpcode();
2122 EVT VT = N->getValueType(0);
2123 SDValue Cond = N1.getOperand(0);
2124 SDValue TVal = N1.getOperand(1);
2125 SDValue FVal = N1.getOperand(2);
2126
2127 // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
2128 // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
2129 // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
2130 auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
2132 switch (Opcode) {
2133 case ISD::FADD: // X + -0.0 --> X
2134 return C->isZero() && C->isNegative();
2135 case ISD::FSUB: // X - 0.0 --> X
2136 return C->isZero() && !C->isNegative();
2137 case ISD::FMUL: // X * 1.0 --> X
2138 case ISD::FDIV: // X / 1.0 --> X
2139 return C->isExactlyValue(1.0);
2140 }
2141 }
2142 return false;
2143 };
2144
2145 // This transform increases uses of N0, so freeze it to be safe.
2146 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2147 if (isIdentityConstantForOpcode(Opcode, TVal)) {
2148 SDValue F0 = DAG.getFreeze(N0);
2149 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2150 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2151 }
2152 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2153 if (isIdentityConstantForOpcode(Opcode, FVal)) {
2154 SDValue F0 = DAG.getFreeze(N0);
2155 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2156 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2157 }
2158
2159 return SDValue();
2160}
2161
2162SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2163 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2164 "Unexpected binary operator");
2165
2166 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2167 auto BinOpcode = BO->getOpcode();
2168 EVT VT = BO->getValueType(0);
2170 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2171 return Sel;
2172
2173 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2174 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2175 return Sel;
2176 }
2177
2178 // Don't do this unless the old select is going away. We want to eliminate the
2179 // binary operator, not replace a binop with a select.
2180 // TODO: Handle ISD::SELECT_CC.
2181 unsigned SelOpNo = 0;
2182 SDValue Sel = BO->getOperand(0);
2183 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2184 SelOpNo = 1;
2185 Sel = BO->getOperand(1);
2186 }
2187
2188 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2189 return SDValue();
2190
2191 SDValue CT = Sel.getOperand(1);
2192 if (!isConstantOrConstantVector(CT, true) &&
2194 return SDValue();
2195
2196 SDValue CF = Sel.getOperand(2);
2197 if (!isConstantOrConstantVector(CF, true) &&
2199 return SDValue();
2200
2201 // Bail out if any constants are opaque because we can't constant fold those.
2202 // The exception is "and" and "or" with either 0 or -1 in which case we can
2203 // propagate non constant operands into select. I.e.:
2204 // and (select Cond, 0, -1), X --> select Cond, 0, X
2205 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2206 bool CanFoldNonConst =
2207 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2210
2211 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2212 if (!CanFoldNonConst &&
2215 return SDValue();
2216
2217 // We have a select-of-constants followed by a binary operator with a
2218 // constant. Eliminate the binop by pulling the constant math into the select.
2219 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2220 SDLoc DL(Sel);
2221 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2222 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2223 if (!CanFoldNonConst && !NewCT.isUndef() &&
2226 return SDValue();
2227
2228 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2229 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2230 if (!CanFoldNonConst && !NewCF.isUndef() &&
2233 return SDValue();
2234
2235 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2236 SelectOp->setFlags(BO->getFlags());
2237 return SelectOp;
2238}
2239
2241 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2242 "Expecting add or sub");
2243
2244 // Match a constant operand and a zext operand for the math instruction:
2245 // add Z, C
2246 // sub C, Z
2247 bool IsAdd = N->getOpcode() == ISD::ADD;
2248 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2249 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2250 auto *CN = dyn_cast<ConstantSDNode>(C);
2251 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2252 return SDValue();
2253
2254 // Match the zext operand as a setcc of a boolean.
2255 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2256 Z.getOperand(0).getValueType() != MVT::i1)
2257 return SDValue();
2258
2259 // Match the compare as: setcc (X & 1), 0, eq.
2260 SDValue SetCC = Z.getOperand(0);
2261 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2262 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2263 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2264 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2265 return SDValue();
2266
2267 // We are adding/subtracting a constant and an inverted low bit. Turn that
2268 // into a subtract/add of the low bit with incremented/decremented constant:
2269 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2270 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2271 EVT VT = C.getValueType();
2272 SDLoc DL(N);
2273 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2274 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2275 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2276 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2277}
2278
2279/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2280/// a shift and add with a different constant.
2282 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2283 "Expecting add or sub");
2284
2285 // We need a constant operand for the add/sub, and the other operand is a
2286 // logical shift right: add (srl), C or sub C, (srl).
2287 bool IsAdd = N->getOpcode() == ISD::ADD;
2288 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2289 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2291 ShiftOp.getOpcode() != ISD::SRL)
2292 return SDValue();
2293
2294 // The shift must be of a 'not' value.
2295 SDValue Not = ShiftOp.getOperand(0);
2296 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2297 return SDValue();
2298
2299 // The shift must be moving the sign bit to the least-significant-bit.
2300 EVT VT = ShiftOp.getValueType();
2301 SDValue ShAmt = ShiftOp.getOperand(1);
2303 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2304 return SDValue();
2305
2306 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2307 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2308 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2309 SDLoc DL(N);
2310 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2311 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2312 if (SDValue NewC =
2313 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2314 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2315 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2316 return SDValue();
2317}
2318
2319/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2320/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2321/// are no common bits set in the operands).
2322SDValue DAGCombiner::visitADDLike(SDNode *N) {
2323 SDValue N0 = N->getOperand(0);
2324 SDValue N1 = N->getOperand(1);
2325 EVT VT = N0.getValueType();
2326 SDLoc DL(N);
2327
2328 // fold (add x, undef) -> undef
2329 if (N0.isUndef())
2330 return N0;
2331 if (N1.isUndef())
2332 return N1;
2333
2334 // fold (add c1, c2) -> c1+c2
2335 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2336 return C;
2337
2338 // canonicalize constant to RHS
2341 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2342
2343 // fold vector ops
2344 if (VT.isVector()) {
2346 return FoldedVOp;
2347
2348 // fold (add x, 0) -> x, vector edition
2350 return N0;
2351 }
2352
2353 // fold (add x, 0) -> x
2354 if (isNullConstant(N1))
2355 return N0;
2356
2357 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2358 // fold ((A-c1)+c2) -> (A+(c2-c1))
2359 if (N0.getOpcode() == ISD::SUB &&
2360 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2361 SDValue Sub =
2362 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2363 assert(Sub && "Constant folding failed");
2364 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2365 }
2366
2367 // fold ((c1-A)+c2) -> (c1+c2)-A
2368 if (N0.getOpcode() == ISD::SUB &&
2369 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2370 SDValue Add =
2371 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2372 assert(Add && "Constant folding failed");
2373 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2374 }
2375
2376 // add (sext i1 X), 1 -> zext (not i1 X)
2377 // We don't transform this pattern:
2378 // add (zext i1 X), -1 -> sext (not i1 X)
2379 // because most (?) targets generate better code for the zext form.
2380 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2382 SDValue X = N0.getOperand(0);
2383 if ((!LegalOperations ||
2384 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2386 X.getScalarValueSizeInBits() == 1) {
2387 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2388 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2389 }
2390 }
2391
2392 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2393 // equivalent to (add x, c0).
2394 if (N0.getOpcode() == ISD::OR &&
2395 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2396 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2398 {N1, N0.getOperand(1)}))
2399 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2400 }
2401 }
2402
2404 return NewSel;
2405
2406 // reassociate add
2407 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2408 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2409 return RADD;
2410
2411 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2412 // equivalent to (add x, c).
2413 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2414 if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2415 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2416 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2417 return DAG.getNode(ISD::ADD, DL, VT,
2418 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2419 N0.getOperand(1));
2420 }
2421 return SDValue();
2422 };
2423 if (SDValue Add = ReassociateAddOr(N0, N1))
2424 return Add;
2425 if (SDValue Add = ReassociateAddOr(N1, N0))
2426 return Add;
2427 }
2428 // fold ((0-A) + B) -> B-A
2429 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2430 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2431
2432 // fold (A + (0-B)) -> A-B
2433 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2434 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2435
2436 // fold (A+(B-A)) -> B
2437 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2438 return N1.getOperand(0);
2439
2440 // fold ((B-A)+A) -> B
2441 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2442 return N0.getOperand(0);
2443
2444 // fold ((A-B)+(C-A)) -> (C-B)
2445 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2446 N0.getOperand(0) == N1.getOperand(1))
2447 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2448 N0.getOperand(1));
2449
2450 // fold ((A-B)+(B-C)) -> (A-C)
2451 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2452 N0.getOperand(1) == N1.getOperand(0))
2453 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2454 N1.getOperand(1));
2455
2456 // fold (A+(B-(A+C))) to (B-C)
2457 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2458 N0 == N1.getOperand(1).getOperand(0))
2459 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2460 N1.getOperand(1).getOperand(1));
2461
2462 // fold (A+(B-(C+A))) to (B-C)
2463 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2464 N0 == N1.getOperand(1).getOperand(1))
2465 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2466 N1.getOperand(1).getOperand(0));
2467
2468 // fold (A+((B-A)+or-C)) to (B+or-C)
2469 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2470 N1.getOperand(0).getOpcode() == ISD::SUB &&
2471 N0 == N1.getOperand(0).getOperand(1))
2472 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2473 N1.getOperand(1));
2474
2475 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2476 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2477 SDValue N00 = N0.getOperand(0);
2478 SDValue N01 = N0.getOperand(1);
2479 SDValue N10 = N1.getOperand(0);
2480 SDValue N11 = N1.getOperand(1);
2481
2483 return DAG.getNode(ISD::SUB, DL, VT,
2484 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2485 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2486 }
2487
2488 // fold (add (umax X, C), -C) --> (usubsat X, C)
2489 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2491 return (!Max && !Op) ||
2492 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2493 };
2495 /*AllowUndefs*/ true))
2496 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2497 N0.getOperand(1));
2498 }
2499
2501 return SDValue(N, 0);
2502
2503 if (isOneOrOneSplat(N1)) {
2504 // fold (add (xor a, -1), 1) -> (sub 0, a)
2505 if (isBitwiseNot(N0))
2506 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2507 N0.getOperand(0));
2508
2509 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2510 if (N0.getOpcode() == ISD::ADD) {
2511 SDValue A, Xor;
2512
2513 if (isBitwiseNot(N0.getOperand(0))) {
2514 A = N0.getOperand(1);
2515 Xor = N0.getOperand(0);
2516 } else if (isBitwiseNot(N0.getOperand(1))) {
2517 A = N0.getOperand(0);
2518 Xor = N0.getOperand(1);
2519 }
2520
2521 if (Xor)
2522 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2523 }
2524
2525 // Look for:
2526 // add (add x, y), 1
2527 // And if the target does not like this form then turn into:
2528 // sub y, (xor x, -1)
2529 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2530 N0.getOpcode() == ISD::ADD) {
2531 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2532 DAG.getAllOnesConstant(DL, VT));
2533 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2534 }
2535 }
2536
2537 // (x - y) + -1 -> add (xor y, -1), x
2538 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2540 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2541 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2542 }
2543
2545 return Combined;
2546
2548 return Combined;
2549
2550 return SDValue();
2551}
2552
2553SDValue DAGCombiner::visitADD(SDNode *N) {
2554 SDValue N0 = N->getOperand(0);
2555 SDValue N1 = N->getOperand(1);
2556 EVT VT = N0.getValueType();
2557 SDLoc DL(N);
2558
2560 return Combined;
2561
2562 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2563 return V;
2564
2565 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2566 return V;
2567
2568 // fold (a+b) -> (a|b) iff a and b share no bits.
2569 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2570 DAG.haveNoCommonBitsSet(N0, N1))
2571 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2572
2573 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2574 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2575 const APInt &C0 = N0->getConstantOperandAPInt(0);
2576 const APInt &C1 = N1->getConstantOperandAPInt(0);
2577 return DAG.getVScale(DL, VT, C0 + C1);
2578 }
2579
2580 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2581 if ((N0.getOpcode() == ISD::ADD) &&
2582 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2583 (N1.getOpcode() == ISD::VSCALE)) {
2584 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2585 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2586 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2587 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2588 }
2589
2590 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2591 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2592 N1.getOpcode() == ISD::STEP_VECTOR) {
2593 const APInt &C0 = N0->getConstantOperandAPInt(0);
2594 const APInt &C1 = N1->getConstantOperandAPInt(0);
2595 APInt NewStep = C0 + C1;
2596 return DAG.getStepVector(DL, VT, NewStep);
2597 }
2598
2599 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2600 if ((N0.getOpcode() == ISD::ADD) &&
2601 (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2602 (N1.getOpcode() == ISD::STEP_VECTOR)) {
2603 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2604 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2605 APInt NewStep = SV0 + SV1;
2606 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2607 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2608 }
2609
2610 return SDValue();
2611}
2612
2613SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2614 unsigned Opcode = N->getOpcode();
2615 SDValue N0 = N->getOperand(0);
2616 SDValue N1 = N->getOperand(1);
2617 EVT VT = N0.getValueType();
2618 SDLoc DL(N);
2619
2620 // fold (add_sat x, undef) -> -1
2621 if (N0.isUndef() || N1.isUndef())
2622 return DAG.getAllOnesConstant(DL, VT);
2623
2624 // fold (add_sat c1, c2) -> c3
2625 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2626 return C;
2627
2628 // canonicalize constant to RHS
2631 return DAG.getNode(Opcode, DL, VT, N1, N0);
2632
2633 // fold vector ops
2634 if (VT.isVector()) {
2635 // TODO SimplifyVBinOp
2636
2637 // fold (add_sat x, 0) -> x, vector edition
2639 return N0;
2640 }
2641
2642 // fold (add_sat x, 0) -> x
2643 if (isNullConstant(N1))
2644 return N0;
2645
2646 // If it cannot overflow, transform into an add.
2647 if (Opcode == ISD::UADDSAT)
2649 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2650
2651 return SDValue();
2652}
2653
2655 bool Masked = false;
2656
2657 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2658 while (true) {
2659 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2660 V = V.getOperand(0);
2661 continue;
2662 }
2663
2664 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2665 Masked = true;
2666 V = V.getOperand(0);
2667 continue;
2668 }
2669
2670 break;
2671 }
2672
2673 // If this is not a carry, return.
2674 if (V.getResNo() != 1)
2675 return SDValue();
2676
2677 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2678 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2679 return SDValue();
2680
2681 EVT VT = V.getNode()->getValueType(0);
2682 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2683 return SDValue();
2684
2685 // If the result is masked, then no matter what kind of bool it is we can
2686 // return. If it isn't, then we need to make sure the bool type is either 0 or
2687 // 1 and not other values.
2688 if (Masked ||
2689 TLI.getBooleanContents(V.getValueType()) ==
2691 return V;
2692
2693 return SDValue();
2694}
2695
2696/// Given the operands of an add/sub operation, see if the 2nd operand is a
2697/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2698/// the opcode and bypass the mask operation.
2700 SelectionDAG &DAG, const SDLoc &DL) {
2701 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2702 return SDValue();
2703
2704 EVT VT = N0.getValueType();
2705 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2706 return SDValue();
2707
2708 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2709 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2710 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2711}
2712
2713/// Helper for doing combines based on N0 and N1 being added to each other.
2714SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2716 EVT VT = N0.getValueType();
2718
2719 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2720 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2721 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2722 return DAG.getNode(ISD::SUB, DL, VT, N0,
2723 DAG.getNode(ISD::SHL, DL, VT,
2724 N1.getOperand(0).getOperand(1),
2725 N1.getOperand(1)));
2726
2727 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2728 return V;
2729
2730 // Look for:
2731 // add (add x, 1), y
2732 // And if the target does not like this form then turn into:
2733 // sub y, (xor x, -1)
2734 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2735 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2736 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2737 DAG.getAllOnesConstant(DL, VT));
2738 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2739 }
2740
2741 // Hoist one-use subtraction by non-opaque constant:
2742 // (x - C) + y -> (x + y) - C
2743 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2744 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2745 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2746 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2747 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2748 }
2749 // Hoist one-use subtraction from non-opaque constant:
2750 // (C - x) + y -> (y - x) + C
2751 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2752 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2753 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2754 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2755 }
2756
2757 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2758 // rather than 'add 0/-1' (the zext should get folded).
2759 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2760 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2761 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2763 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2764 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2765 }
2766
2767 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2768 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2769 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2770 if (TN->getVT() == MVT::i1) {
2771 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2772 DAG.getConstant(1, DL, VT));
2773 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2774 }
2775 }
2776
2777 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2778 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2779 N1.getResNo() == 0)
2780 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2781 N0, N1.getOperand(0), N1.getOperand(2));
2782
2783 // (add X, Carry) -> (addcarry X, 0, Carry)
2785 if (SDValue Carry = getAsCarry(TLI, N1))
2786 return DAG.getNode(ISD::ADDCARRY, DL,
2787 DAG.getVTList(VT, Carry.getValueType()), N0,
2788 DAG.getConstant(0, DL, VT), Carry);
2789
2790 return SDValue();
2791}
2792
2793SDValue DAGCombiner::visitADDC(SDNode *N) {
2794 SDValue N0 = N->getOperand(0);
2795 SDValue N1 = N->getOperand(1);
2796 EVT VT = N0.getValueType();
2797 SDLoc DL(N);
2798
2799 // If the flag result is dead, turn this into an ADD.
2800 if (!N->hasAnyUseOfValue(1))
2801 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2803
2804 // canonicalize constant to RHS.
2807 if (N0C && !N1C)
2808 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2809
2810 // fold (addc x, 0) -> x + no carry out
2811 if (isNullConstant(N1))
2812 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2813 DL, MVT::Glue));
2814
2815 // If it cannot overflow, transform into an add.
2817 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2819
2820 return SDValue();
2821}
2822
2823/**
2824 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2825 * then the flip also occurs if computing the inverse is the same cost.
2826 * This function returns an empty SDValue in case it cannot flip the boolean
2827 * without increasing the cost of the computation. If you want to flip a boolean
2828 * no matter what, use DAG.getLogicalNOT.
2829 */
2831 const TargetLowering &TLI,
2832 bool Force) {
2833 if (Force && isa<ConstantSDNode>(V))
2834 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2835
2836 if (V.getOpcode() != ISD::XOR)
2837 return SDValue();
2838
2839 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2840 if (!Const)
2841 return SDValue();
2842
2843 EVT VT = V.getValueType();
2844
2845 bool IsFlip = false;
2846 switch(TLI.getBooleanContents(VT)) {
2848 IsFlip = Const->isOne();
2849 break;
2851 IsFlip = Const->isAllOnes();
2852 break;
2854 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2855 break;
2856 }
2857
2858 if (IsFlip)
2859 return V.getOperand(0);
2860 if (Force)
2861 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2862 return SDValue();
2863}
2864
2865SDValue DAGCombiner::visitADDO(SDNode *N) {
2866 SDValue N0 = N->getOperand(0);
2867 SDValue N1 = N->getOperand(1);
2868 EVT VT = N0.getValueType();
2869 bool IsSigned = (ISD::SADDO == N->getOpcode());
2870
2871 EVT CarryVT = N->getValueType(1);
2872 SDLoc DL(N);
2873
2874 // If the flag result is dead, turn this into an ADD.
2875 if (!N->hasAnyUseOfValue(1))
2876 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2877 DAG.getUNDEF(CarryVT));
2878
2879 // canonicalize constant to RHS.
2882 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2883
2884 // fold (addo x, 0) -> x + no carry out
2885 if (isNullOrNullSplat(N1))
2886 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2887
2888 if (!IsSigned) {
2889 // If it cannot overflow, transform into an add.
2891 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2892 DAG.getConstant(0, DL, CarryVT));
2893
2894 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2895 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2896 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2897 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2898 return CombineTo(
2899 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2900 }
2901
2902 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2903 return Combined;
2904
2905 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2906 return Combined;
2907 }
2908
2909 return SDValue();
2910}
2911
2912SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2913 EVT VT = N0.getValueType();
2914 if (VT.isVector())
2915 return SDValue();
2916
2917 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2918 // If Y + 1 cannot overflow.
2919 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2920 SDValue Y = N1.getOperand(0);
2921 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2923 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2924 N1.getOperand(2));
2925 }
2926
2927 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2929 if (SDValue Carry = getAsCarry(TLI, N1))
2930 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2931 DAG.getConstant(0, SDLoc(N), VT), Carry);
2932
2933 return SDValue();
2934}
2935
2936SDValue DAGCombiner::visitADDE(SDNode *N) {
2937 SDValue N0 = N->getOperand(0);
2938 SDValue N1 = N->getOperand(1);
2939 SDValue CarryIn = N->getOperand(2);
2940
2941 // canonicalize constant to RHS
2944 if (N0C && !N1C)
2945 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2946 N1, N0, CarryIn);
2947
2948 // fold (adde x, y, false) -> (addc x, y)
2949 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2950 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2951
2952 return SDValue();
2953}
2954
2955SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2956 SDValue N0 = N->getOperand(0);
2957 SDValue N1 = N->getOperand(1);
2958 SDValue CarryIn = N->getOperand(2);
2959 SDLoc DL(N);
2960
2961 // canonicalize constant to RHS
2964 if (N0C && !N1C)
2965 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2966
2967 // fold (addcarry x, y, false) -> (uaddo x, y)
2968 if (isNullConstant(CarryIn)) {
2969 if (!LegalOperations ||
2970 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2971 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2972 }
2973
2974 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2975 if (isNullConstant(N0) && isNullConstant(N1)) {
2976 EVT VT = N0.getValueType();
2977 EVT CarryVT = CarryIn.getValueType();
2979 AddToWorklist(CarryExt.getNode());
2980 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2981 DAG.getConstant(1, DL, VT)),
2982 DAG.getConstant(0, DL, CarryVT));
2983 }
2984
2986 return Combined;
2987
2989 return Combined;
2990
2991 return SDValue();
2992}
2993
2994SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2995 SDValue N0 = N->getOperand(0);
2996 SDValue N1 = N->getOperand(1);
2997 SDValue CarryIn = N->getOperand(2);
2998 SDLoc DL(N);
2999
3000 // canonicalize constant to RHS
3003 if (N0C && !N1C)
3004 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3005
3006 // fold (saddo_carry x, y, false) -> (saddo x, y)
3007 if (isNullConstant(CarryIn)) {
3008 if (!LegalOperations ||
3009 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3010 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3011 }
3012
3013 return SDValue();
3014}
3015
3016/**
3017 * If we are facing some sort of diamond carry propapagtion pattern try to
3018 * break it up to generate something like:
3019 * (addcarry X, 0, (addcarry A, B, Z):Carry)
3020 *
3021 * The end result is usually an increase in operation required, but because the
3022 * carry is now linearized, other tranforms can kick in and optimize the DAG.
3023 *
3024 * Patterns typically look something like
3025 * (uaddo A, B)
3026 * / \
3027 * Carry Sum
3028 * | \
3029 * | (addcarry *, 0, Z)
3030 * | /
3031 * \ Carry
3032 * | /
3033 * (addcarry X, *, *)
3034 *
3035 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3036 * produce a combine with a single path for carry propagation.
3037 */
3040 SDNode *N) {
3041 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3042 return SDValue();
3043 if (Carry1.getOpcode() != ISD::UADDO)
3044 return SDValue();
3045
3046 SDValue Z;
3047
3048 /**
3049 * First look for a suitable Z. It will present itself in the form of
3050 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3051 */
3052 if (Carry0.getOpcode() == ISD::ADDCARRY &&
3053 isNullConstant(Carry0.getOperand(1))) {
3054 Z = Carry0.getOperand(2);
3055 } else if (Carry0.getOpcode() == ISD::UADDO &&
3056 isOneConstant(Carry0.getOperand(1))) {
3057 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3058 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3059 } else {
3060 // We couldn't find a suitable Z.
3061 return SDValue();
3062 }
3063
3064
3065 auto cancelDiamond = [&](SDValue A,SDValue B) {
3066 SDLoc DL(N);
3067 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3068 Combiner.AddToWorklist(NewY.getNode());
3069 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3070 DAG.getConstant(0, DL, X.getValueType()),
3071 NewY.getValue(1));
3072 };
3073
3074 /**
3075 * (uaddo A, B)
3076 * |
3077 * Sum
3078 * |
3079 * (addcarry *, 0, Z)
3080 */
3081 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3082 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3083 }
3084
3085 /**
3086 * (addcarry A, 0, Z)
3087 * |
3088 * Sum
3089 * |
3090 * (uaddo *, B)
3091 */
3092 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3093 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3094 }
3095
3096 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3097 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3098 }
3099
3100 return SDValue();
3101}
3102
3103// If we are facing some sort of diamond carry/borrow in/out pattern try to
3104// match patterns like:
3105//
3106// (uaddo A, B) CarryIn
3107// | \ |
3108// | \ |
3109// PartialSum PartialCarryOutX /
3110// | | /
3111// | ____|____________/
3112// | / |
3113// (uaddo *, *) \________
3114// | \ \
3115// | \ |
3116// | PartialCarryOutY |
3117// | \ |
3118// | \ /
3119// AddCarrySum | ______/
3120// | /
3121// CarryOut = (or *, *)
3122//
3123// And generate ADDCARRY (or SUBCARRY) with two result values:
3124//
3125// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3126//
3127// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3128// a single path for carry/borrow out propagation:
3131 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3132 return SDValue();
3133 unsigned Opcode = Carry0.getOpcode();
3134 if (Opcode != Carry1.getOpcode())
3135 return SDValue();
3136 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3137 return SDValue();
3138
3139 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3140 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3141 // the above ASCII art.)
3142 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3143 Carry1.getOperand(1) != Carry0.getValue(0))
3145 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3146 Carry1.getOperand(1) != Carry0.getValue(0))
3147 return SDValue();
3148
3149 // The carry in value must be on the righthand side for subtraction.
3150 unsigned CarryInOperandNum =
3151 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3152 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3153 return SDValue();
3155
3156 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3157 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3158 return SDValue();
3159
3160 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3161 // TODO: make getAsCarry() aware of how partial carries are merged.
3162 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3163 return SDValue();
3164 CarryIn = CarryIn.getOperand(0);
3165 if (CarryIn.getValueType() != MVT::i1)
3166 return SDValue();
3167
3168 SDLoc DL(N);
3169 SDValue Merged =
3170 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3171 Carry0.getOperand(1), CarryIn);
3172
3173 // Please note that because we have proven that the result of the UADDO/USUBO
3174 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3175 // therefore prove that if the first UADDO/USUBO overflows, the second
3176 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3177 // maximum value.
3178 //
3179 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3180 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3181 //
3182 // This is important because it means that OR and XOR can be used to merge
3183 // carry flags; and that AND can return a constant zero.
3184 //
3185 // TODO: match other operations that can merge flags (ADD, etc)
3186 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3187 if (N->getOpcode() == ISD::AND)
3188 return DAG.getConstant(0, DL, MVT::i1);
3189 return Merged.getValue(1);
3190}
3191
3192SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3193 SDNode *N) {
3194 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3195 if (isBitwiseNot(N0))
3196 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3197 SDLoc DL(N);
3198 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3199 N0.getOperand(0), NotC);
3200 return CombineTo(
3201 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3202 }
3203
3204 // Iff the flag result is dead:
3205 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3206 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3207 // or the dependency between the instructions.
3208 if ((N0.getOpcode() == ISD::ADD ||
3209 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3210 N0.getValue(1) != CarryIn)) &&
3211 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3212 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3213 N0.getOperand(0), N0.getOperand(1), CarryIn);
3214
3215 /**
3216 * When one of the addcarry argument is itself a carry, we may be facing
3217 * a diamond carry propagation. In which case we try to transform the DAG
3218 * to ensure linear carry propagation if that is possible.
3219 */
3220 if (auto Y = getAsCarry(TLI, N1)) {
3221 // Because both are carries, Y and Z can be swapped.
3222 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3223 return R;
3224 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3225 return R;
3226 }
3227
3228 return SDValue();
3229}
3230
3231// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3232// clamp/truncation if necessary.
3234 SDValue RHS, SelectionDAG &DAG,
3235 const SDLoc &DL) {
3236 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3237 "Illegal truncation");
3238
3239 if (DstVT == SrcVT)
3240 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3241
3242 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3243 // clamping RHS.
3244 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3245 DstVT.getScalarSizeInBits());
3246 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3247 return SDValue();
3248
3250 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3251 DstVT.getScalarSizeInBits()),
3252 DL, SrcVT);
3253 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3254 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3255 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3256 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3257}
3258
3259// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3260// usubsat(a,b), optionally as a truncated type.
3261SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3262 if (N->getOpcode() != ISD::SUB ||
3263 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3264 return SDValue();
3265
3266 EVT SubVT = N->getValueType(0);
3267 SDValue Op0 = N->getOperand(0);
3268 SDValue Op1 = N->getOperand(1);
3269
3270 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3271 // they may be converted to usubsat(a,b).
3272 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3273 SDValue MaxLHS = Op0.getOperand(0);
3274 SDValue MaxRHS = Op0.getOperand(1);
3275 if (MaxLHS == Op1)
3276 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3277 if (MaxRHS == Op1)
3278 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3279 }
3280
3281 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3282 SDValue MinLHS = Op1.getOperand(0);
3283 SDValue MinRHS = Op1.getOperand(1);
3284 if (MinLHS == Op0)
3285 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3286 if (MinRHS == Op0)
3287 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3288 }
3289
3290 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3291 if (Op1.getOpcode() == ISD::TRUNCATE &&
3292 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3293 Op1.getOperand(0).hasOneUse()) {
3294 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3295 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3296 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3297 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3298 DAG, SDLoc(N));
3299 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3300 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3301 DAG, SDLoc(N));
3302 }
3303
3304 return SDValue();
3305}
3306
3307// Since it may not be valid to emit a fold to zero for vector initializers
3308// check if we can before folding.
3309static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3310 SelectionDAG &DAG, bool LegalOperations) {
3311 if (!VT.isVector())
3312 return DAG.getConstant(0, DL, VT);
3313 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3314 return DAG.getConstant(0, DL, VT);
3315 return SDValue();
3316}
3317
3318SDValue DAGCombiner::visitSUB(SDNode *N) {
3319 SDValue N0 = N->getOperand(0);
3320 SDValue N1 = N->getOperand(1);
3321 EVT VT = N0.getValueType();
3322 SDLoc DL(N);
3323
3324 // fold (sub x, x) -> 0
3325 // FIXME: Refactor this and xor and other similar operations together.
3326 if (N0 == N1)
3327 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3328
3329 // fold (sub c1, c2) -> c3
3330 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3331 return C;
3332
3333 // fold vector ops
3334 if (VT.isVector()) {
3336 return FoldedVOp;
3337
3338 // fold (sub x, 0) -> x, vector edition
3340 return N0;
3341 }
3342
3344 return NewSel;
3345
3347
3348 // fold (sub x, c) -> (add x, -c)
3349 if (N1C) {
3350 return DAG.getNode(ISD::ADD, DL, VT, N0,
3351 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3352 }
3353
3354 if (isNullOrNullSplat(N0)) {
3355 unsigned BitWidth = VT.getScalarSizeInBits();
3356 // Right-shifting everything out but the sign bit followed by negation is
3357 // the same as flipping arithmetic/logical shift type without the negation:
3358 // -(X >>u 31) -> (X >>s 31)
3359 // -(X >>s 31) -> (X >>u 31)
3360 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3361 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3362 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3363 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3364 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3365 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3366 }
3367 }
3368
3369 // 0 - X --> 0 if the sub is NUW.
3370 if (N->getFlags().hasNoUnsignedWrap())
3371 return N0;
3372
3374 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3375 // N1 must be 0 because negating the minimum signed value is undefined.
3376 if (N->getFlags().hasNoSignedWrap())
3377 return N0;
3378
3379 // 0 - X --> X if X is 0 or the minimum signed value.
3380 return N1;
3381 }
3382
3383 // Convert 0 - abs(x).
3384 if (N1->getOpcode() == ISD::ABS &&
3386 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3387 return Result;
3388
3389 // Fold neg(splat(neg(x)) -> splat(x)
3390 if (VT.isVector()) {
3391 SDValue N1S = DAG.getSplatValue(N1, true);
3392 if (N1S && N1S.getOpcode() == ISD::SUB &&
3393 isNullConstant(N1S.getOperand(0))) {
3394 if (VT.isScalableVector())
3395 return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3396 return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3397 }
3398 }
3399 }
3400
3401 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3403 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3404
3405 // fold (A - (0-B)) -> A+B
3406 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3407 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3408
3409 // fold A-(A-B) -> B
3410 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3411 return N1.getOperand(1);
3412
3413 // fold (A+B)-A -> B
3414 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3415 return N0.getOperand(1);
3416
3417 // fold (A+B)-B -> A
3418 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3419 return N0.getOperand(0);
3420
3421 // fold (A+C1)-C2 -> A+(C1-C2)
3422 if (N0.getOpcode() == ISD::ADD &&
3423 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3424 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3425 SDValue NewC =
3426 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3427 assert(NewC && "Constant folding failed");
3428 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3429 }
3430
3431 // fold C2-(A+C1) -> (C2-C1)-A
3432 if (N1.getOpcode() == ISD::ADD) {
3433 SDValue N11 = N1.getOperand(1);
3434 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3435 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3436 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3437 assert(NewC && "Constant folding failed");
3438 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3439 }
3440 }
3441
3442 // fold (A-C1)-C2 -> A-(C1+C2)
3443 if (N0.getOpcode() == ISD::SUB &&
3444 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3445 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3446 SDValue NewC =
3447 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3448 assert(NewC && "Constant folding failed");
3449 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3450 }
3451
3452 // fold (c1-A)-c2 -> (c1-c2)-A
3453 if (N0.getOpcode() == ISD::SUB &&
3454 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3455 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3456 SDValue NewC =
3457 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3458 assert(NewC && "Constant folding failed");
3459 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3460 }
3461
3462 // fold ((A+(B+or-C))-B) -> A+or-C
3463 if (N0.getOpcode() == ISD::ADD &&
3464 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3465 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3466 N0.getOperand(1).getOperand(0) == N1)
3467 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3468 N0.getOperand(1).getOperand(1));
3469
3470 // fold ((A+(C+B))-B) -> A+C
3471 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3472 N0.getOperand(1).getOperand(1) == N1)
3473 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3474 N0.getOperand(1).getOperand(0));
3475
3476 // fold ((A-(B-C))-C) -> A-B
3477 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3478 N0.getOperand(1).getOperand(1) == N1)
3479 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3480 N0.getOperand(1).getOperand(0));
3481
3482 // fold (A-(B-C)) -> A+(C-B)
3483 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3484 return DAG.getNode(ISD::ADD, DL, VT, N0,
3485 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3486 N1.getOperand(0)));
3487
3488 // A - (A & B) -> A & (~B)
3489 if (N1.getOpcode() == ISD::AND) {
3490 SDValue A = N1.getOperand(0);
3491 SDValue B = N1.getOperand(1);
3492 if (A != N0)
3493 std::swap(A, B);
3494 if (A == N0 &&
3495 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3496 SDValue InvB =
3497 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3498 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3499 }
3500 }
3501
3502 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3503 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3504 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3505 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3506 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3507 N1.getOperand(0).getOperand(1),
3508 N1.getOperand(1));
3509 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3510 }
3511 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3512 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3513 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3514 N1.getOperand(0),
3515 N1.getOperand(1).getOperand(1));
3516 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3517 }
3518 }
3519
3520 // If either operand of a sub is undef, the result is undef
3521 if (N0.isUndef())
3522 return N0;
3523 if (N1.isUndef())
3524 return N1;
3525
3526 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3527 return V;
3528
3529 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3530 return V;
3531
3532 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3533 return V;
3534
3535 if (SDValue V = foldSubToUSubSat(VT, N))
3536 return V;
3537
3538 // (x - y) - 1 -> add (xor y, -1), x
3539 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3540 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3541 DAG.getAllOnesConstant(DL, VT));
3542 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3543 }
3544
3545 // Look for:
3546 // sub y, (xor x, -1)
3547 // And if the target does not like this form then turn into:
3548 // add (add x, y), 1
3549 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3550 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3551 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3552 }
3553
3554 // Hoist one-use addition by non-opaque constant:
3555 // (x + C) - y -> (x - y) + C
3556 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3557 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3558 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3559 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3560 }
3561 // y - (x + C) -> (y - x) - C
3562 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3563 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3564 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3565 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3566 }
3567 // (x - C) - y -> (x - y) - C
3568 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3569 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3570 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3571 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3572 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3573 }
3574 // (C - x) - y -> C - (x + y)
3575 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3576 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3577 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3578 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3579 }
3580
3581 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3582 // rather than 'sub 0/1' (the sext should get folded).
3583 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3584 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3585 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3586 TLI.getBooleanContents(VT) ==
3588 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3589 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3590 }
3591
3592 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3593 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3594 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3595 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3596 SDValue S0 = N1.getOperand(0);
3597 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3598 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3599 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3600 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3601 }
3602 }
3603
3604 // If the relocation model supports it, consider symbol offsets.
3606 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3607 // fold (sub Sym, c) -> Sym-c
3608 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3609 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3610 GA->getOffset() -
3611 (uint64_t)N1C->getSExtValue());
3612 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3614 if (GA->getGlobal() == GB->getGlobal())
3615 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3616 DL, VT);
3617 }
3618
3619 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3620 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3621 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3622 if (TN->getVT() == MVT::i1) {
3623 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3624 DAG.getConstant(1, DL, VT));
3625 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3626 }
3627 }
3628
3629 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3630 if (N1.getOpcode() == ISD::VSCALE) {
3631 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3632 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3633 }
3634
3635 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3636 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3637 APInt NewStep = -N1.getConstantOperandAPInt(0);
3638 return DAG.getNode(ISD::ADD, DL, VT, N0,
3639 DAG.getStepVector(DL, VT, NewStep));
3640 }
3641
3642 // Prefer an add for more folding potential and possibly better codegen:
3643 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3644 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3645 SDValue ShAmt = N1.getOperand(1);
3647 if (ShAmtC &&
3648 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3649 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3650 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3651 }
3652 }
3653
3655 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3656 if (SDValue Carry = getAsCarry(TLI, N0)) {
3657 SDValue X = N1;
3658 SDValue Zero = DAG.getConstant(0, DL, VT);
3659 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3660 return DAG.getNode(ISD::ADDCARRY, DL,
3661 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3662 Carry);
3663 }
3664 }
3665
3666 return SDValue();
3667}
3668
3669SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3670 SDValue N0 = N->getOperand(0);
3671 SDValue N1 = N->getOperand(1);
3672 EVT VT = N0.getValueType();
3673 SDLoc DL(N);
3674
3675 // fold (sub_sat x, undef) -> 0
3676 if (N0.isUndef() || N1.isUndef())
3677 return DAG.getConstant(0, DL, VT);
3678
3679 // fold (sub_sat x, x) -> 0
3680 if (N0 == N1)
3681 return DAG.getConstant(0, DL, VT);
3682
3683 // fold (sub_sat c1, c2) -> c3
3684 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3685 return C;
3686
3687 // fold vector ops
3688 if (VT.isVector()) {
3689 // TODO SimplifyVBinOp
3690
3691 // fold (sub_sat x, 0) -> x, vector edition
3693 return N0;
3694 }
3695
3696 // fold (sub_sat x, 0) -> x
3697 if (isNullConstant(N1))
3698 return N0;
3699
3700 return SDValue();
3701}
3702
3703SDValue DAGCombiner::visitSUBC(SDNode *N) {
3704 SDValue N0 = N->getOperand(0);
3705 SDValue N1 = N->getOperand(1);
3706 EVT VT = N0.getValueType();
3707 SDLoc DL(N);
3708
3709 // If the flag result is dead, turn this into an SUB.
3710 if (!N->hasAnyUseOfValue(1))
3711 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3713
3714 // fold (subc x, x) -> 0 + no borrow
3715 if (N0 == N1)
3716 return CombineTo(N, DAG.getConstant(0, DL, VT),
3718
3719 // fold (subc x, 0) -> x + no borrow
3720 if (isNullConstant(N1))
3721 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3722
3723 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3724 if (isAllOnesConstant(N0))
3725 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3727
3728 return SDValue();
3729}
3730
3731SDValue DAGCombiner::visitSUBO(SDNode *N) {
3732 SDValue N0 = N->getOperand(0);
3733 SDValue N1 = N->getOperand(1);
3734 EVT VT = N0.getValueType();
3735 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3736
3737 EVT CarryVT = N->getValueType(1);
3738 SDLoc DL(N);
3739
3740 // If the flag result is dead, turn this into an SUB.
3741 if (!N->hasAnyUseOfValue(1))
3742 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3743 DAG.getUNDEF(CarryVT));
3744
3745 // fold (subo x, x) -> 0 + no borrow
3746 if (N0 == N1)
3747 return CombineTo(N, DAG.getConstant(0, DL, VT),
3748 DAG.getConstant(0, DL, CarryVT));
3749
3751
3752 // fold (subox, c) -> (addo x, -c)
3753 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3754 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3755 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3756 }
3757
3758 // fold (subo x, 0) -> x + no borrow
3759 if (isNullOrNullSplat(N1))
3760 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3761
3762 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3763 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3764 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3765 DAG.getConstant(0, DL, CarryVT));
3766
3767 return SDValue();
3768}
3769
3770SDValue DAGCombiner::visitSUBE(SDNode *N) {
3771 SDValue N0 = N->getOperand(0);
3772 SDValue N1 = N->getOperand(1);
3773 SDValue CarryIn = N->getOperand(2);
3774
3775 // fold (sube x, y, false) -> (subc x, y)
3776 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3777 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3778
3779 return SDValue();
3780}
3781
3782SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3783 SDValue N0 = N->getOperand(0);
3784 SDValue N1 = N->getOperand(1);
3785 SDValue CarryIn = N->getOperand(2);
3786
3787 // fold (subcarry x, y, false) -> (usubo x, y)
3788 if (isNullConstant(CarryIn)) {
3789 if (!LegalOperations ||
3790 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3791 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3792 }
3793
3794 return SDValue();
3795}
3796
3797SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3798 SDValue N0 = N->getOperand(0);
3799 SDValue N1 = N->getOperand(1);
3800 SDValue CarryIn = N->getOperand(2);
3801
3802 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3803 if (isNullConstant(CarryIn)) {
3804 if (!LegalOperations ||
3805 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3806 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3807 }
3808
3809 return SDValue();
3810}
3811
3812// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3813// UMULFIXSAT here.
3814SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3815 SDValue N0 = N->getOperand(0);
3816 SDValue N1 = N->getOperand(1);
3817 SDValue Scale = N->getOperand(2);
3818 EVT VT = N0.getValueType();
3819
3820 // fold (mulfix x, undef, scale) -> 0
3821 if (N0.isUndef() || N1.isUndef())
3822 return DAG.getConstant(0, SDLoc(N), VT);
3823
3824 // Canonicalize constant to RHS (vector doesn't have to splat)
3827 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3828
3829 // fold (mulfix x, 0, scale) -> 0
3830 if (isNullConstant(N1))
3831 return DAG.getConstant(0, SDLoc(N), VT);
3832
3833 return SDValue();
3834}
3835
3836SDValue DAGCombiner::visitMUL(SDNode *N) {
3837 SDValue N0 = N->getOperand(0);
3838 SDValue N1 = N->getOperand(1);
3839 EVT VT = N0.getValueType();
3840
3841 // fold (mul x, undef) -> 0
3842 if (N0.isUndef() || N1.isUndef())
3843 return DAG.getConstant(0, SDLoc(N), VT);
3844
3845 // fold (mul c1, c2) -> c1*c2
3846 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3847 return C;
3848
3849 // canonicalize constant to RHS (vector doesn't have to splat)
3852 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3853
3854 bool N1IsConst = false;
3855 bool N1IsOpaqueConst = false;
3857
3858 // fold vector ops
3859 if (VT.isVector()) {
3861 return FoldedVOp;
3862
3864 assert((!N1IsConst ||
3865 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3866 "Splat APInt should be element width");
3867 } else {
3869 if (N1IsConst) {
3870 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3872 }
3873 }
3874
3875 // fold (mul x, 0) -> 0
3876 if (N1IsConst && ConstValue1.isZero())
3877 return N1;
3878
3879 // fold (mul x, 1) -> x
3880 if (N1IsConst && ConstValue1.isOne())
3881 return N0;
3882
3884 return NewSel;
3885
3886 // fold (mul x, -1) -> 0-x
3887 if (N1IsConst && ConstValue1.isAllOnes()) {
3888 SDLoc DL(N);
3889 return DAG.getNode(ISD::SUB, DL, VT,
3890 DAG.getConstant(0, DL, VT), N0);
3891 }
3892
3893 // fold (mul x, (1 << c)) -> x << c
3894 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3896 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3897 SDLoc DL(N);
3900 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3901 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3902 }
3903
3904 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3905 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
3906 unsigned Log2Val = (-ConstValue1).logBase2();
3907 SDLoc DL(N);
3908 // FIXME: If the input is something that is easily negated (e.g. a
3909 // single-use add), we should put the negate there.
3910 return DAG.getNode(ISD::SUB, DL, VT,
3911 DAG.getConstant(0, DL, VT),
3912 DAG.getNode(ISD::SHL, DL, VT, N0,
3913 DAG.getConstant(Log2Val, DL,
3915 }
3916
3917 // Try to transform:
3918 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3919 // mul x, (2^N + 1) --> add (shl x, N), x
3920 // mul x, (2^N - 1) --> sub (shl x, N), x
3921 // Examples: x * 33 --> (x << 5) + x
3922 // x * 15 --> (x << 4) - x
3923 // x * -33 --> -((x << 5) + x)
3924 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3925 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3926 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3927 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3928 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3929 // x * 0xf800 --> (x << 16) - (x << 11)
3930 // x * -0x8800 --> -((x << 15) + (x << 11))
3931 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3932 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3933 // TODO: We could handle more general decomposition of any constant by
3934 // having the target set a limit on number of ops and making a
3935 // callback to determine that sequence (similar to sqrt expansion).
3936 unsigned MathOp = ISD::DELETED_NODE;
3937 APInt MulC = ConstValue1.abs();
3938 // The constant `2` should be treated as (2^0 + 1).
3939 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3940 MulC.lshrInPlace(TZeros);
3941 if ((MulC - 1).isPowerOf2())
3942 MathOp = ISD::ADD;
3943 else if ((MulC + 1).isPowerOf2())
3944 MathOp = ISD::SUB;
3945
3946 if (MathOp != ISD::DELETED_NODE) {
3947 unsigned ShAmt =
3948 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3949 ShAmt += TZeros;
3950 assert(ShAmt < VT.getScalarSizeInBits() &&
3951 "multiply-by-constant generated out of bounds shift");
3952 SDLoc DL(N);
3953 SDValue Shl =
3954 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3955 SDValue R =
3956 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3957 DAG.getNode(ISD::SHL, DL, VT, N0,
3958 DAG.getConstant(TZeros, DL, VT)))
3959 : DAG.getNode(MathOp, DL, VT, Shl, N0);
3960 if (ConstValue1.isNegative())
3961 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3962 return R;
3963 }
3964 }
3965
3966 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3967 if (N0.getOpcode() == ISD::SHL &&
3968 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3969 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3970 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3972 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3973 }
3974
3975 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3976 // use.
3977 {
3978 SDValue Sh, Y;
3979
3980 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3981 if (N0.getOpcode() == ISD::SHL &&
3983 N0.getNode()->hasOneUse()) {
3984 Sh = N0; Y = N1;
3985 } else if (N1.getOpcode() == ISD::SHL &&
3986 isConstantOrConstantVector(N1.getOperand(1)) &&
3987 N1.getNode()->hasOneUse()) {
3988 Sh = N1; Y = N0;
3989 }
3990
3991 if (Sh.getNode()) {
3992 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3993 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3994 }
3995 }
3996
3997 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3999 N0.getOpcode() == ISD::ADD &&
4002 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
4003 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
4004 N0.getOperand(0), N1),
4005 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
4006 N0.getOperand(1), N1));
4007
4008 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4009 if (N0.getOpcode() == ISD::VSCALE)
4011 const APInt &C0 = N0.getConstantOperandAPInt(0);
4012 const APInt &C1 = NC1->getAPIntValue();
4013 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
4014 }
4015
4016 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4017 APInt MulVal;
4018 if (N0.getOpcode() == ISD::STEP_VECTOR)
4019 if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4020 const APInt &C0 = N0.getConstantOperandAPInt(0);
4021 APInt NewStep = C0 * MulVal;
4022 return DAG.getStepVector(SDLoc(N), VT, NewStep);
4023 }
4024
4025 // Fold ((mul x, 0/undef) -> 0,
4026 // (mul x, 1) -> x) -> x)
4027 // -> and(x, mask)
4028 // We can replace vectors with '0' and '1' factors with a clearing mask.
4029 if (VT.isFixedLengthVector()) {
4030 unsigned NumElts = VT.getVectorNumElements();
4032 ClearMask.reserve(NumElts);
4033 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4034 if (!V || V->isZero()) {
4035 ClearMask.push_back(true);
4036 return true;
4037 }
4038 ClearMask.push_back(false);
4039 return V->isOne();
4040 };
4041 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4042 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4043 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4044 SDLoc DL(N);
4045 EVT LegalSVT = N1.getOperand(0).getValueType();
4046 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4047 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4049 for (unsigned I = 0; I != NumElts; ++I)
4050 if (ClearMask[I])
4051 Mask[I] = Zero;
4052 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4053 }
4054 }
4055
4056 // reassociate mul
4057 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
4058 return RMUL;
4059
4060 return SDValue();
4061}
4062
4063/// Return true if divmod libcall is available.
4064static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4065 const TargetLowering &TLI) {
4067 EVT NodeType = Node->getValueType(0);
4068 if (!NodeType.isSimple())
4069 return false;
4070 switch (NodeType.getSimpleVT().SimpleTy) {
4071 default: return false; // No libcall for vector types.
4072 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4073 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4074 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4075 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4076 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4077 }
4078
4079 return TLI.getLibcallName(LC) != nullptr;
4080}
4081
4082/// Issue divrem if both quotient and remainder are needed.
4083SDValue DAGCombiner::useDivRem(SDNode *Node) {
4084 if (Node->use_empty())
4085 return SDValue(); // This is a dead node, leave it alone.
4086
4087 unsigned Opcode = Node->getOpcode();
4088 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4089 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4090
4091 // DivMod lib calls can still work on non-legal types if using lib-calls.
4092 EVT VT = Node->getValueType(0);
4093 if (VT.isVector() || !VT.isInteger())
4094 return SDValue();
4095
4096 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4097 return SDValue();
4098
4099 // If DIVREM is going to get expanded into a libcall,
4100 // but there is no libcall available, then don't combine.
4101 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4102 !isDivRemLibcallAvailable(Node, isSigned, TLI))
4103 return SDValue();
4104
4105 // If div is legal, it's better to do the normal expansion
4106 unsigned OtherOpcode = 0;
4107 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4108 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4109 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4110 return SDValue();
4111 } else {
4112 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4114 return SDValue();
4115 }
4116
4117 SDValue Op0 = Node->getOperand(0);
4118 SDValue Op1 = Node->getOperand(1);
4119 SDValue combined;
4120 for (SDNode *User : Op0.getNode()->uses()) {
4121 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4122 User->use_empty())
4123 continue;
4124 // Convert the other matching node(s), too;
4125 // otherwise, the DIVREM may get target-legalized into something
4126 // target-specific that we won't be able to recognize.
4127 unsigned UserOpc = User->getOpcode();
4128 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4129 User->getOperand(0) == Op0 &&
4130 User->getOperand(1) == Op1) {
4131 if (!combined) {
4132 if (UserOpc == OtherOpcode) {
4133 SDVTList VTs = DAG.getVTList(VT, VT);
4134 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4135 } else if (UserOpc == DivRemOpc) {
4136 combined = SDValue(User, 0);
4137 } else {
4138 assert(UserOpc == Opcode);
4139 continue;
4140 }
4141 }
4142 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4143 CombineTo(User, combined);
4144 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4145 CombineTo(User, combined.getValue(1));
4146 }
4147 }
4148 return combined;
4149}
4150
4152 SDValue N0 = N->getOperand(0);
4153 SDValue N1 = N->getOperand(1);
4154 EVT VT = N->getValueType(0);
4155 SDLoc DL(N);
4156
4157 unsigned Opc = N->getOpcode();
4158 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4160
4161 // X / undef -> undef
4162 // X % undef -> undef
4163 // X / 0 -> undef
4164 // X % 0 -> undef
4165 // NOTE: This includes vectors where any divisor element is zero/undef.
4166 if (DAG.isUndef(Opc, {N0, N1}))
4167 return DAG.getUNDEF(VT);
4168
4169 // undef / X -> 0
4170 // undef % X -> 0
4171 if (N0.isUndef())
4172 return DAG.getConstant(0, DL, VT);
4173
4174 // 0 / X -> 0
4175 // 0 % X -> 0
4177 if (N0C && N0C->isZero())
4178 return N0;
4179
4180 // X / X -> 1
4181 // X % X -> 0
4182 if (N0 == N1)
4183 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4184
4185 // X / 1 -> X
4186 // X % 1 -> 0
4187 // If this is a boolean op (single-bit element type), we can't have
4188 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4189 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4190 // it's a 1.
4191 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4192 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4193
4194 return SDValue();
4195}
4196
4197SDValue DAGCombiner::visitSDIV(SDNode *N) {
4198 SDValue N0 = N->getOperand(0);
4199 SDValue N1 = N->getOperand(1);
4200 EVT VT = N->getValueType(0);
4202 SDLoc DL(N);
4203
4204 // fold (sdiv c1, c2) -> c1/c2
4205 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4206 return C;
4207
4208 // fold vector ops
4209 if (VT.isVector())
4211 return FoldedVOp;
4212
4213 // fold (sdiv X, -1) -> 0-X
4215 if (N1C && N1C->isAllOnes())
4216 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4217
4218 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4219 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4220 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4221 DAG.getConstant(1, DL, VT),
4222 DAG.getConstant(0, DL, VT));
4223
4224 if (SDValue V = simplifyDivRem(N, DAG))
4225 return V;
4226
4228 return NewSel;
4229
4230 // If we know the sign bits of both operands are zero, strength reduce to a
4231 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4232 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4233 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4234
4235 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4236 // If the corresponding remainder node exists, update its users with
4237 // (Dividend - (Quotient * Divisor).
4238 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4239 { N0, N1 })) {
4240 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4241 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4242 AddToWorklist(Mul.getNode());
4243 AddToWorklist(Sub.getNode());
4244 CombineTo(RemNode, Sub);
4245 }
4246 return V;
4247 }
4248
4249 // sdiv, srem -> sdivrem
4250 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4251 // true. Otherwise, we break the simplification logic in visitREM().
4253 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4254 if (SDValue DivRem = useDivRem(N))
4255 return DivRem;
4256
4257 return SDValue();
4258}
4259
4260SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4261 SDLoc DL(N);
4262 EVT VT = N->getValueType(0);
4264 unsigned BitWidth = VT.getScalarSizeInBits();
4265
4266 // Helper for determining whether a value is a power-2 constant scalar or a
4267 // vector of such elements.
4268 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4269 if (C->isZero() || C->isOpaque())
4270 return false;
4271 if (C->getAPIntValue().isPowerOf2())
4272 return true;
4273 if (C->getAPIntValue().isNegatedPowerOf2())
4274 return true;
4275 return false;
4276 };
4277
4278 // fold (sdiv X, pow2) -> simple ops after legalize
4279 // FIXME: We check for the exact bit here because the generic lowering gives
4280 // better results in that case. The target-specific lowering should learn how
4281 // to handle exact sdivs efficiently.
4282 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4283 // Target-specific implementation of sdiv x, pow2.
4284 if (SDValue Res = BuildSDIVPow2(N))
4285 return Res;
4286
4287 // Create constants that are functions of the shift amount value.
4290 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4291 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4292 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4294 return SDValue();
4295
4296 // Splat the sign bit into the register
4297 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4298 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4299 AddToWorklist(Sign.getNode());
4300
4301 // Add (N0 < 0) ? abs2 - 1 : 0;
4302 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4303 AddToWorklist(Srl.getNode());
4304 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4305 AddToWorklist(Add.getNode());
4306 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4307 AddToWorklist(Sra.getNode());
4308
4309 // Special case: (sdiv X, 1) -> X
4310 // Special Case: (sdiv X, -1) -> 0-X
4311 SDValue One = DAG.getConstant(1, DL, VT);
4312 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4313 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4314 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4316 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4317
4318 // If dividing by a positive value, we're done. Otherwise, the result must
4319 // be negated.
4320 SDValue Zero = DAG.getConstant(0, DL, VT);
4321 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4322
4323 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4324 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4325 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4326 return Res;
4327 }
4328
4329 // If integer divide is expensive and we satisfy the requirements, emit an
4330 // alternate sequence. Targets may check function attributes for size/speed
4331 // trade-offs.
4334 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4335 if (SDValue Op = BuildSDIV(N))
4336 return Op;
4337
4338 return SDValue();
4339}
4340
4341SDValue DAGCombiner::visitUDIV(SDNode *N) {
4342 SDValue N0 = N->getOperand(0);
4343 SDValue N1 = N->getOperand(1);
4344 EVT VT = N->getValueType(0);
4346 SDLoc DL(N);
4347
4348 // fold (udiv c1, c2) -> c1/c2
4349 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4350 return C;
4351
4352 // fold vector ops
4353 if (VT.isVector())
4355 return FoldedVOp;
4356
4357 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4359 if (N1C && N1C->isAllOnes())
4360 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4361 DAG.getConstant(1, DL, VT),
4362 DAG.getConstant(0, DL, VT));
4363
4364 if (SDValue V = simplifyDivRem(N, DAG))
4365 return V;
4366
4368 return NewSel;
4369
4370 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4371 // If the corresponding remainder node exists, update its users with
4372 // (Dividend - (Quotient * Divisor).
4373 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4374 { N0, N1 })) {
4375 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4376 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4377 AddToWorklist(Mul.getNode());
4378 AddToWorklist(Sub.getNode());
4379 CombineTo(RemNode, Sub);
4380 }
4381 return V;
4382 }
4383
4384 // sdiv, srem -> sdivrem
4385 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4386 // true. Otherwise, we break the simplification logic in visitREM().
4388 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4389 if (SDValue DivRem = useDivRem(N))
4390 return DivRem;
4391
4392 return SDValue();
4393}
4394
4395SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4396 SDLoc DL(N);
4397 EVT VT = N->getValueType(0);
4398
4399 // fold (udiv x, (1 << c)) -> x >>u c
4400 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4403 AddToWorklist(LogBase2.getNode());
4404
4406 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4407 AddToWorklist(Trunc.getNode());
4408 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4409 }
4410
4411 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4412 if (N1.getOpcode() == ISD::SHL) {
4413 SDValue N10 = N1.getOperand(0);
4414 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4417 AddToWorklist(LogBase2.getNode());
4418
4419 EVT ADDVT = N1.getOperand(1).getValueType();
4420 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4421 AddToWorklist(Trunc.getNode());
4422 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4423 AddToWorklist(Add.getNode());
4424 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4425 }
4426 }
4427
4428 // fold (udiv x, c) -> alternate
4431 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4432 if (SDValue Op = BuildUDIV(N))
4433 return Op;
4434
4435 return SDValue();
4436}
4437
4438// handles ISD::SREM and ISD::UREM
4439SDValue DAGCombiner::visitREM(SDNode *N) {
4440 unsigned Opcode = N->getOpcode();
4441 SDValue N0 = N->getOperand(0);
4442 SDValue N1 = N->getOperand(1);
4443 EVT VT = N->getValueType(0);
4445
4446 bool isSigned = (Opcode == ISD::SREM);
4447 SDLoc DL(N);
4448
4449 // fold (rem c1, c2) -> c1%c2
4451 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4452 return C;
4453
4454 // fold (urem X, -1) -> select(X == -1, 0, x)
4455 if (!isSigned && N1C && N1C->isAllOnes())
4456 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4457 DAG.getConstant(0, DL, VT), N0);
4458
4459 if (SDValue V = simplifyDivRem(N, DAG))
4460 return V;
4461
4463 return NewSel;
4464
4465 if (isSigned) {
4466 // If we know the sign bits of both operands are zero, strength reduce to a
4467 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4468 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4469 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4470 } else {
4471 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4472 // fold (urem x, pow2) -> (and x, pow2-1)
4474 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4475 AddToWorklist(Add.getNode());
4476 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4477 }
4478 if (N1.getOpcode() == ISD::SHL &&
4479 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4480 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4482 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4483 AddToWorklist(Add.getNode());
4484 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4485 }
4486 }
4487
4489
4490 // If X/C can be simplified by the division-by-constant logic, lower
4491 // X%C to the equivalent of X-X/C*C.
4492 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4493 // speculative DIV must not cause a DIVREM conversion. We guard against this
4494 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4495 // combine will not return a DIVREM. Regardless, checking cheapness here
4496 // makes sense since the simplification results in fatter code.
4497 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4499 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4500 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4501 // If the equivalent Div node also exists, update its users.
4502 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4503 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4504 { N0, N1 }))
4505 CombineTo(DivNode, OptimizedDiv);
4507 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4508 AddToWorklist(OptimizedDiv.getNode());
4509 AddToWorklist(Mul.getNode());
4510 return Sub;
4511 }
4512 }
4513
4514 // sdiv, srem -> sdivrem
4515 if (SDValue DivRem = useDivRem(N))
4516 return DivRem.getValue(1);
4517
4518 return SDValue();
4519}
4520
4521SDValue DAGCombiner::visitMULHS(SDNode *N) {
4522 SDValue N0 = N->getOperand(0);
4523 SDValue N1 = N->getOperand(1);
4524 EVT VT = N->getValueType(0);
4525 SDLoc DL(N);
4526
4527 // fold (mulhs c1, c2)
4528 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4529 return C;
4530
4531 // canonicalize constant to RHS.
4534 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4535
4536 if (VT.isVector()) {
4538 return FoldedVOp;
4539
4540 // fold (mulhs x, 0) -> 0
4541 // do not return N1, because undef node may exist.
4543 return DAG.getConstant(0, DL, VT);
4544 }
4545
4546 // fold (mulhs x, 0) -> 0
4547 if (isNullConstant(N1))
4548 return N1;
4549
4550 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4551 if (isOneConstant(N1))
4552 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4555
4556 // fold (mulhs x, undef) -> 0
4557 if (N0.isUndef() || N1.isUndef())
4558 return DAG.getConstant(0, DL, VT);
4559
4560 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4561 // plus a shift.
4562 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4563 !VT.isVector()) {
4564 MVT Simple = VT.getSimpleVT();
4565 unsigned SimpleSize = Simple.getSizeInBits();
4567 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4568 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4570 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4571 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4573 getShiftAmountTy(N1.getValueType())));
4574 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4575 }
4576 }
4577
4578 return SDValue();
4579}
4580
4581SDValue DAGCombiner::visitMULHU(SDNode *N) {
4582 SDValue N0 = N->getOperand(0);
4583 SDValue N1 = N->getOperand(1);
4584 EVT VT = N->getValueType(0);
4585 SDLoc DL(N);
4586
4587 // fold (mulhu c1, c2)
4588 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4589 return C;
4590
4591 // canonicalize constant to RHS.
4594 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4595
4596 if (VT.isVector()) {
4598 return FoldedVOp;
4599
4600 // fold (mulhu x, 0) -> 0
4601 // do not return N1, because undef node may exist.
4603 return DAG.getConstant(0, DL, VT);
4604 }
4605
4606 // fold (mulhu x, 0) -> 0
4607 if (isNullConstant(N1))
4608 return N1;
4609
4610 // fold (mulhu x, 1) -> 0
4611 if (isOneConstant(N1))
4612 return DAG.getConstant(0, DL, N0.getValueType());
4613
4614 // fold (mulhu x, undef) -> 0
4615 if (N0.isUndef() || N1.isUndef())
4616 return DAG.getConstant(0, DL, VT);
4617
4618 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4619 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4621 unsigned NumEltBits = VT.getScalarSizeInBits();
4623 SDValue SRLAmt = DAG.getNode(
4624 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4626 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4627 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4628 }
4629
4630 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4631 // plus a shift.
4632 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4633 !VT.isVector()) {
4634 MVT Simple = VT.getSimpleVT();
4635 unsigned SimpleSize = Simple.getSizeInBits();
4637 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4638 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4640 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4641 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4643 getShiftAmountTy(N1.getValueType())));
4644 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4645 }
4646 }
4647
4648 // Simplify the operands using demanded-bits information.
4649 // We don't have demanded bits support for MULHU so this just enables constant
4650 // folding based on known bits.
4652 return SDValue(N, 0);
4653
4654 return SDValue();
4655}
4656
4657/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4658/// give the opcodes for the two computations that are being performed. Return
4659/// true if a simplification was made.
4660SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4661 unsigned HiOp) {
4662 // If the high half is not needed, just compute the low half.
4663 bool HiExists = N->hasAnyUseOfValue(1);
4664 if (!HiExists && (!LegalOperations ||
4665 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4666 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4667 return CombineTo(N, Res, Res);
4668 }
4669
4670 // If the low half is not needed, just compute the high half.
4671 bool LoExists = N->hasAnyUseOfValue(0);
4672 if (!LoExists && (!LegalOperations ||
4673 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4674 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4675 return CombineTo(N, Res, Res);
4676 }
4677
4678 // If both halves are used, return as it is.
4679 if (LoExists && HiExists)
4680 return SDValue();
4681
4682 // If the two computed results can be simplified separately, separate them.
4683 if (LoExists) {
4684 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4685 AddToWorklist(Lo.getNode());
4686 SDValue LoOpt = combine(Lo.getNode());
4687 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4688 (!LegalOperations ||
4689 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4690 return CombineTo(N, LoOpt, LoOpt);
4691 }
4692
4693 if (HiExists) {
4694 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4695 AddToWorklist(Hi.getNode());
4696 SDValue HiOpt = combine(Hi.getNode());
4697 if (HiOpt.getNode() && HiOpt != Hi &&
4698 (!LegalOperations ||
4699 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4700 return CombineTo(N, HiOpt, HiOpt);
4701 }
4702
4703 return SDValue();
4704}
4705
4706SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4708 return Res;
4709
4710 EVT VT = N->getValueType(0);
4711 SDLoc DL(N);
4712
4713 // If the type is twice as wide is legal, transform the mulhu to a wider
4714 // multiply plus a shift.
4715 if (VT.isSimple() && !VT.isVector()) {
4716 MVT Simple = VT.getSimpleVT();
4717 unsigned SimpleSize = Simple.getSizeInBits();
4719 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4720 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4721 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4722 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4723 // Compute the high part as N1.
4724 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4726 getShiftAmountTy(Lo.getValueType())));
4727 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4728 // Compute the low part as N0.
4729 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4730 return CombineTo(N, Lo, Hi);
4731 }
4732 }
4733
4734 return SDValue();
4735}
4736
4737SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4739 return Res;
4740
4741 EVT VT = N->getValueType(0);
4742 SDLoc DL(N);
4743
4744 // (umul_lohi N0, 0) -> (0, 0)
4745 if (isNullConstant(N->getOperand(1))) {
4746 SDValue Zero = DAG.getConstant(0, DL, VT);
4747 return CombineTo(N, Zero, Zero);
4748 }
4749
4750 // (umul_lohi N0, 1) -> (N0, 0)
4751 if (isOneConstant(N->getOperand(1))) {
4752 SDValue Zero = DAG.getConstant(0, DL, VT);
4753 return CombineTo(N, N->getOperand(0), Zero);
4754 }
4755
4756 // If the type is twice as wide is legal, transform the mulhu to a wider
4757 // multiply plus a shift.
4758 if (VT.isSimple() && !VT.isVector()) {
4759 MVT Simple = VT.getSimpleVT();
4760 unsigned SimpleSize = Simple.getSizeInBits();
4762 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4763 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4764 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4765 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4766 // Compute the high part as N1.
4767 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4769 getShiftAmountTy(Lo.getValueType())));
4770 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4771 // Compute the low part as N0.
4772 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4773 return CombineTo(N, Lo, Hi);
4774 }
4775 }
4776
4777 return SDValue();
4778}
4779
4780SDValue DAGCombiner::visitMULO(SDNode *N) {
4781 SDValue N0 = N->getOperand(0);
4782 SDValue N1 = N->getOperand(1);
4783 EVT VT = N0.getValueType();
4784 bool IsSigned = (ISD::SMULO == N->getOpcode());
4785
4786 EVT CarryVT = N->getValueType(1);
4787 SDLoc DL(N);
4788
4791
4792 // fold operation with constant operands.
4793 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4794 // multiple results.
4795 if (N0C && N1C) {
4796 bool Overflow;
4797 APInt Result =
4798 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4799 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4800 return CombineTo(N, DAG.getConstant(Result, DL, VT),
4802 }
4803
4804 // canonicalize constant to RHS.
4807 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4808
4809 // fold (mulo x, 0) -> 0 + no carry out
4810 if (isNullOrNullSplat(N1))
4811 return CombineTo(N, DAG.getConstant(0, DL, VT),
4812 DAG.getConstant(0, DL, CarryVT));
4813
4814 // (mulo x, 2) -> (addo x, x)
4815 if (N1C && N1C->getAPIntValue() == 2)
4816 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4817 N->getVTList(), N0, N0);
4818
4819 if (IsSigned) {
4820 // A 1 bit SMULO overflows if both inputs are 1.
4821 if (VT.getScalarSizeInBits() == 1) {
4822 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4823 return CombineTo(N, And,
4824 DAG.getSetCC(DL, CarryVT, And,
4825 DAG.getConstant(0, DL, VT), ISD::SETNE));
4826 }
4827
4828 // Multiplying n * m significant bits yields a result of n + m significant
4829 // bits. If the total number of significant bits does not exceed the
4830 // result bit width (minus 1), there is no overflow.
4831 unsigned SignBits = DAG.ComputeNumSignBits(N0);
4832 if (SignBits > 1)
4834 if (SignBits > VT.getScalarSizeInBits() + 1)
4835 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4836 DAG.getConstant(0, DL, CarryVT));
4837 } else {
4840 bool Overflow;
4841 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4842 if (!Overflow)
4843 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4844 DAG.getConstant(0, DL, CarryVT));
4845 }
4846
4847 return SDValue();
4848}
4849
4850// Function to calculate whether the Min/Max pair of SDNodes (potentially
4851// swapped around) make a signed saturate pattern, clamping to between a signed
4852// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
4853// Returns the node being clamped and the bitwidth of the clamp in BW. Should
4854// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
4855// same as SimplifySelectCC. N0<N1 ? N2 : N3.
4857 SDValue N3, ISD::CondCode CC, unsigned &BW,
4858 bool &Unsigned) {
4859 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
4860 ISD::CondCode CC) {
4861 // The compare and select operand should be the same or the select operands
4862 // should be truncated versions of the comparison.
4863 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
4864 return 0;
4865 // The constants need to be the same or a truncated version of each other.
4868 if (!N1C || !N3C)
4869 return 0;
4870 const APInt &C1 = N1C->getAPIntValue();
4871 const APInt &C2 = N3C->getAPIntValue();
4872 if (C1.getBitWidth() < C2.getBitWidth() ||
4873 C1 != C2.sextOrSelf(C1.getBitWidth()))
4874 return 0;
4875 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
4876 };
4877
4878 // Check the initial value is a SMIN/SMAX equivalent.
4879 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
4880 if (!Opcode0)
4881 return SDValue();
4882
4883 SDValue N00, N01, N02, N03;
4885 switch (N0.getOpcode()) {
4886 case ISD::SMIN:
4887 case ISD::SMAX:
4888 N00 = N02 = N0.getOperand(0);
4889 N01 = N03 = N0.getOperand(1);
4891 break;
4892 case ISD::SELECT_CC:
4893 N00 = N0.getOperand(0);
4894 N01 = N0.getOperand(1);
4895 N02 = N0.getOperand(2);
4896 N03 = N0.getOperand(3);
4897 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
4898 break;
4899 case ISD::SELECT:
4900 case ISD::VSELECT:
4901 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
4902 return SDValue();
4903 N00 = N0.getOperand(0).getOperand(0);
4904 N01 = N0.getOperand(0).getOperand(1);
4905 N02 = N0.getOperand(1);
4906 N03 = N0.getOperand(2);
4908 break;
4909 default:
4910 return SDValue();
4911 }
4912
4913 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
4914 if (!Opcode1 || Opcode0 == Opcode1)
4915 return SDValue();
4916
4919 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
4920 return SDValue();
4921
4922 const APInt &MinC = MinCOp->getAPIntValue();
4923 const APInt &MaxC = MaxCOp->getAPIntValue();
4924 APInt MinCPlus1 = MinC + 1;
4925 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
4926 BW = MinCPlus1.exactLogBase2() + 1;
4927 Unsigned = false;
4928 return N02;
4929 }
4930
4931 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
4932 BW = MinCPlus1.exactLogBase2();
4933 Unsigned = true;
4934 return N02;
4935 }
4936
4937 return SDValue();
4938}
4939
4941 SDValue N3, ISD::CondCode CC,
4942 SelectionDAG &DAG) {
4943 unsigned BW;
4944 bool Unsigned;
4945 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
4946 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
4947 return SDValue();
4948 EVT FPVT = Fp.getOperand(0).getValueType();
4950 if (FPVT.isVector())
4952 FPVT.getVectorElementCount());
4955 return SDValue();
4956 SDLoc DL(Fp);
4957 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
4958 DAG.getValueType(NewVT.getScalarType()));
4959 return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
4960 : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
4961}
4962
4964 SDValue N3, ISD::CondCode CC,
4965 SelectionDAG &DAG) {
4966 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
4967 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
4968 // be truncated versions of the the setcc (N0/N1).
4969 if ((N0 != N2 &&
4970 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
4971 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
4972 return SDValue();
4975 if (!N1C || !N3C)
4976 return SDValue();
4977 const APInt &C1 = N1C->getAPIntValue();
4978 const APInt &C3 = N3C->getAPIntValue();
4979 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
4980 C1 != C3.zextOrSelf(C1.getBitWidth()))
4981 return SDValue();
4982
4983 unsigned BW = (C1 + 1).exactLogBase2();
4984 EVT FPVT = N0.getOperand(0).getValueType();
4986 if (FPVT.isVector())
4988 FPVT.getVectorElementCount());
4990 FPVT, NewVT))
4991 return SDValue();
4992
4993 SDValue Sat =
4995 DAG.getValueType(NewVT.getScalarType()));
4996 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
4997}
4998
4999SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5000 SDValue N0 = N->getOperand(0);
5001 SDValue N1 = N->getOperand(1);
5002 EVT VT = N0.getValueType();
5003 unsigned Opcode = N->getOpcode();
5004 SDLoc DL(N);
5005
5006 // fold operation with constant operands.
5007 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5008 return C;
5009
5010 // canonicalize constant to RHS
5013 return DAG.getNode(Opcode, DL, VT, N1, N0);
5014
5015 // fold vector ops
5016 if (VT.isVector())
5018 return FoldedVOp;
5019
5020 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5021 // Only do this if the current op isn't legal and the flipped is.
5022 if (!TLI.isOperationLegal(Opcode, VT) &&
5023 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5024 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5025 unsigned AltOpcode;
5026 switch (Opcode) {
5027 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5028 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5029 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5030 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5031 default: llvm_unreachable("Unknown MINMAX opcode");
5032 }
5033 if (TLI.isOperationLegal(AltOpcode, VT))
5034 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5035 }
5036
5037 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5039 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5040 return S;
5041 if (Opcode == ISD::UMIN)
5042 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5043 return S;
5044
5045 // Simplify the operands using demanded-bits information.
5047 return SDValue(N, 0);
5048
5049 return SDValue();
5050}
5051
5052/// If this is a bitwise logic instruction and both operands have the same
5053/// opcode, try to sink the other opcode after the logic instruction.
5054SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5055 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5056 EVT VT = N0.getValueType();
5057 unsigned LogicOpcode = N->getOpcode();
5058 unsigned HandOpcode = N0.getOpcode();
5060 LogicOpcode == ISD::XOR) && "Expected logic opcode");
5061 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5062
5063 // Bail early if none of these transforms apply.
5064 if (N0.getNumOperands() == 0)
5065 return SDValue();
5066
5067 // FIXME: We should check number of uses of the operands to not increase
5068 // the instruction count for all transforms.
5069
5070 // Handle size-changing casts.
5071 SDValue X = N0.getOperand(0);
5072 SDValue Y = N1.getOperand(0);
5073 EVT XVT = X.getValueType();
5074 SDLoc DL(N);
5077 // If both operands have other uses, this transform would create extra
5078 // instructions without eliminating anything.
5079 if (!N0.hasOneUse() && !N1.hasOneUse())
5080 return SDValue();
5081 // We need matching integer source types.
5082 if (XVT != Y.getValueType())
5083 return SDValue();
5084 // Don't create an illegal op during or after legalization. Don't ever
5085 // create an unsupported vector op.
5086 if ((VT.isVector() || LegalOperations) &&
5088 return SDValue();
5089 // Avoid infinite looping with PromoteIntBinOp.
5090 // TODO: Should we apply desirable/legal constraints to all opcodes?
5091 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
5093 return SDValue();
5094 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5095 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5096 return DAG.getNode(HandOpcode, DL, VT, Logic);
5097 }
5098
5099 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5100 if (HandOpcode == ISD::TRUNCATE) {
5101 // If both operands have other uses, this transform would create extra
5102 // instructions without eliminating anything.
5103 if (!N0.hasOneUse() && !N1.hasOneUse())
5104 return SDValue();
5105 // We need matching source types.
5106 if (XVT != Y.getValueType())
5107 return SDValue();
5108 // Don't create an illegal op during or after legalization.
5109 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5110 return SDValue();
5111 // Be extra careful sinking truncate. If it's free, there's no benefit in
5112 // widening a binop. Also, don't create a logic op on an illegal type.
5113 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5114 return SDValue();
5115 if (!TLI.isTypeLegal(XVT))
5116 return SDValue();
5117 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5118 return DAG.getNode(HandOpcode, DL, VT, Logic);
5119 }
5120
5121 // For binops SHL/SRL/SRA/AND:
5122 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5123 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5125 N0.getOperand(1) == N1.getOperand(1)) {
5126 // If either operand has other uses, this transform is not an improvement.
5127 if (!N0.hasOneUse() || !N1.hasOneUse())
5128 return SDValue();
5129 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5130 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5131 }
5132
5133 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5134 if (HandOpcode == ISD::BSWAP) {
5135 // If either operand has other uses, this transform is not an improvement.
5136 if (!N0.hasOneUse() || !N1.hasOneUse())
5137 return SDValue();
5138 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5139 return DAG.getNode(HandOpcode, DL, VT, Logic);
5140 }
5141
5142 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5143 // Only perform this optimization up until type legalization, before
5144 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5145 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5146 // we don't want to undo this promotion.
5147 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5148 // on scalars.
5150 Level <= AfterLegalizeTypes) {
5151 // Input types must be integer and the same.
5152 if (XVT.isInteger() && XVT == Y.getValueType() &&
5153 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5154 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5155 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5156 return DAG.getNode(HandOpcode, DL, VT, Logic);
5157 }
5158 }
5159
5160 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5161 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5162 // If both shuffles use the same mask, and both shuffle within a single
5163 // vector, then it is worthwhile to move the swizzle after the operation.
5164 // The type-legalizer generates this pattern when loading illegal
5165 // vector types from memory. In many cases this allows additional shuffle
5166 // optimizations.
5167 // There are other cases where moving the shuffle after the xor/and/or
5168 // is profitable even if shuffles don't perform a swizzle.
5169 // If both shuffles use the same mask, and both shuffles have the same first
5170 // or second operand, then it might still be profitable to move the shuffle
5171 // after the xor/and/or operation.
5173 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5175 assert(X.getValueType() == Y.getValueType() &&
5176 "Inputs to shuffles are not the same type");
5177
5178 // Check that both shuffles use the same mask. The masks are known to be of
5179 // the same length because the result vector type is the same.
5180 // Check also that shuffles have only one use to avoid introducing extra
5181 // instructions.
5182 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5183 !SVN0->getMask().equals(SVN1->getMask()))
5184 return SDValue();
5185
5186 // Don't try to fold this node if it requires introducing a
5187 // build vector of all zeros that might be illegal at this stage.
5188 SDValue ShOp = N0.getOperand(1);
5189 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5190 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5191
5192 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5193 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5194 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5195 N0.getOperand(0), N1.getOperand(0));
5196 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5197 }
5198
5199 // Don't try to fold this node if it requires introducing a
5200 // build vector of all zeros that might be illegal at this stage.
5201 ShOp = N0.getOperand(0);
5202 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5203 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5204
5205 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5206 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5207 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5208 N1.getOperand(1));
5209 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5210 }
5211 }
5212
5213 return SDValue();
5214}
5215
5216/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5217SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5218 const SDLoc &DL) {
5219 SDValue LL, LR, RL, RR, N0CC, N1CC;
5220 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5221 !isSetCCEquivalent(N1, RL, RR, N1CC))
5222 return SDValue();
5223
5224 assert(N0.getValueType() == N1.getValueType() &&
5225 "Unexpected operand types for bitwise logic op");
5226 assert(LL.getValueType() == LR.getValueType() &&
5227 RL.getValueType() == RR.getValueType() &&
5228 "Unexpected operand types for setcc");
5229
5230 // If we're here post-legalization or the logic op type is not i1, the logic
5231 // op type must match a setcc result type. Also, all folds require new
5232 // operations on the left and right operands, so those types must match.
5233 EVT VT = N0.getValueType();
5234 EVT OpVT = LL.getValueType();
5235 if (LegalOperations || VT.getScalarType() != MVT::i1)
5236 if (VT != getSetCCResultType(OpVT))
5237 return SDValue();
5238 if (OpVT != RL.getValueType())
5239 return SDValue();
5240
5243 bool IsInteger = OpVT.isInteger();
5244 if (LR == RR && CC0 == CC1 && IsInteger) {
5245 bool IsZero = isNullOrNullSplat(LR);
5247
5248 // All bits clear?
5249 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5250 // All sign bits clear?
5251 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5252 // Any bits set?
5253 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5254 // Any sign bits set?
5255 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5256
5257 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5258 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5259 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5260 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5261 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5262 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5263 AddToWorklist(Or.getNode());
5264 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5265 }
5266
5267 // All bits set?
5268 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5269 // All sign bits set?
5270 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5271 // Any bits clear?
5272 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5273 // Any sign bits clear?
5274 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5275
5276 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5277 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5278 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5279 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5280 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5281 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5282 AddToWorklist(And.getNode());
5283 return DAG.getSetCC(DL, VT, And, LR, CC1);
5284 }
5285 }
5286
5287 // TODO: What is the 'or' equivalent of this fold?
5288 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5289 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5290 IsInteger && CC0 == ISD::SETNE &&
5291 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5292 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5293 SDValue One = DAG.getConstant(1, DL, OpVT);
5294 SDValue Two = DAG.getConstant(2, DL, OpVT);
5295 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5296 AddToWorklist(Add.getNode());
5297 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5298 }
5299
5300 // Try more general transforms if the predicates match and the only user of
5301 // the compares is the 'and' or 'or'.
5303 N0.hasOneUse() && N1.hasOneUse()) {
5304 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5305 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5306 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5307 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5308 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5309 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5310 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5311 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5312 }
5313
5314 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5315 // TODO - support non-uniform vector amounts.
5316 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5317 // Match a shared variable operand and 2 non-opaque constant operands.
5320 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5321 const APInt &CMax =
5322 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5323 const APInt &CMin =
5324 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5325 // The difference of the constants must be a single bit.
5326 if ((CMax - CMin).isPowerOf2()) {
5327 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5328 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5329 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5330 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5331 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5332 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5333 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5334 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5335 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5336 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5337 }
5338 }
5339 }
5340 }
5341
5342 // Canonicalize equivalent operands to LL == RL.
5343 if (LL == RR && LR == RL) {
5345 std::swap(RL, RR);
5346 }
5347
5348 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5349 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5350 if (LL == RL && LR == RR) {
5352 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5353 if (NewCC != ISD::SETCC_INVALID &&
5354 (!LegalOperations ||
5355 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5357 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5358 }
5359
5360 return SDValue();
5361}
5362
5363/// This contains all DAGCombine rules which reduce two values combined by
5364/// an And operation to a single value. This makes them reusable in the context
5365/// of visitSELECT(). Rules involving constants are not included as
5366/// visitSELECT() already handles those cases.
5367SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5368 EVT VT = N1.getValueType();
5369 SDLoc DL(N);
5370
5371 // fold (and x, undef) -> 0
5372 if (N0.isUndef() || N1.isUndef())
5373 return DAG.getConstant(0, DL, VT);
5374
5375 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5376 return V;
5377
5378 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5379 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5380 VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5382 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5383 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5384 // immediate for an add, but it is legal if its top c2 bits are set,
5385 // transform the ADD so the immediate doesn't need to be materialized
5386 // in a register.
5387 APInt ADDC = ADDI->getAPIntValue();
5388 APInt SRLC = SRLI->getAPIntValue();
5389 if (ADDC.getMinSignedBits() <= 64 &&
5390 SRLC.ult(VT.getSizeInBits()) &&
5391 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5393 SRLC.getZExtValue());
5394 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5395 ADDC |= Mask;
5396 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5397 SDLoc DL0(N0);
5398 SDValue NewAdd =
5399 DAG.getNode(ISD::ADD, DL0, VT,
5400 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5401 CombineTo(N0.getNode(), NewAdd);
5402 // Return N so it doesn't get rechecked!
5403 return SDValue(N, 0);
5404 }
5405 }
5406 }
5407 }
5408 }
5409 }
5410
5411 // Reduce bit extract of low half of an integer to the narrower type.
5412 // (and (srl i64:x, K), KMask) ->
5413 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5414 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5417 unsigned Size = VT.getSizeInBits();
5418 const APInt &AndMask = CAnd->getAPIntValue();
5419 unsigned ShiftBits = CShift->getZExtValue();
5420
5421 // Bail out, this node will probably disappear anyway.
5422 if (ShiftBits == 0)
5423 return SDValue();
5424
5425 unsigned MaskBits = AndMask.countTrailingOnes();
5427
5428 if (AndMask.isMask() &&
5429 // Required bits must not span the two halves of the integer and
5430 // must fit in the half size type.
5431 (ShiftBits + MaskBits <= Size / 2) &&
5432 TLI.isNarrowingProfitable(VT, HalfVT) &&
5435 TLI.isTruncateFree(VT, HalfVT) &&
5436 TLI.isZExtFree(HalfVT, VT)) {
5437 // The isNarrowingProfitable is to avoid regressions on PPC and
5438 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5439 // on downstream users of this. Those patterns could probably be
5440 // extended to handle extensions mixed in.
5441
5442 SDValue SL(N0);
5443 assert(MaskBits <= Size);
5444
5445 // Extracting the highest bit of the low half.
5447 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5448 N0.getOperand(0));
5449
5450 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5452 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5453 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5454 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5455 }
5456 }
5457 }
5458 }
5459
5460 return SDValue();
5461}
5462
5463bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5465 if (!AndC->getAPIntValue().isMask())
5466 return false;
5467
5468 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5469
5471 EVT LoadedVT = LoadN->getMemoryVT();
5472
5473 if (ExtVT == LoadedVT &&
5474 (!LegalOperations ||
5476 // ZEXTLOAD will match without needing to change the size of the value being
5477 // loaded.
5478 return true;
5479 }
5480
5481 // Do not change the width of a volatile or atomic loads.
5482 if (!LoadN->isSimple())
5483 return false;
5484
5485 // Do not generate loads of non-round integer types since these can
5486 // be expensive (and would be wrong if the type is not byte sized).
5487 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5488 return false;
5489
5490 if (LegalOperations &&
5492 return false;
5493
5495 return false;
5496
5497 return true;
5498}
5499
5500bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5501 ISD::LoadExtType ExtType, EVT &MemVT,
5502 unsigned ShAmt) {
5503 if (!LDST)
5504 return false;
5505 // Only allow byte offsets.
5506 if (ShAmt % 8)
5507 return false;
5508
5509 // Do not generate loads of non-round integer types since these can
5510 // be expensive (and would be wrong if the type is not byte sized).
5511 if (!MemVT.isRound())
5512 return false;
5513
5514 // Don't change the width of a volatile or atomic loads.
5515 if (!LDST->isSimple())
5516 return false;
5517
5518 EVT LdStMemVT = LDST->getMemoryVT();
5519
5520 // Bail out when changing the scalable property, since we can't be sure that
5521 // we're actually narrowing here.
5522 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5523 return false;
5524
5525 // Verify that we are actually reducing a load width here.
5526 if (LdStMemVT.bitsLT(MemVT))
5527 return false;
5528
5529 // Ensure that this isn't going to produce an unsupported memory access.
5530 if (ShAmt) {
5531 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5532 const unsigned ByteShAmt = ShAmt / 8;
5533 const Align LDSTAlign = LDST->getAlign();
5535 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5536 LDST->getAddressSpace(), NarrowAlign,
5537 LDST->getMemOperand()->getFlags()))
5538 return false;
5539 }
5540
5541 // It's not possible to generate a constant of extended or untyped type.
5542 EVT PtrType = LDST->getBasePtr().getValueType();
5543 if (PtrType == MVT::Untyped || PtrType.isExtended())
5544 return false;
5545
5546 if (isa<LoadSDNode>(LDST)) {
5548 // Don't transform one with multiple uses, this would require adding a new
5549 // load.
5550 if (!SDValue(Load, 0).hasOneUse())
5551 return false;
5552
5553 if (LegalOperations &&
5554 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5555 return false;
5556
5557 // For the transform to be legal, the load must produce only two values
5558 // (the value loaded and the chain). Don't transform a pre-increment
5559 // load, for example, which produces an extra value. Otherwise the
5560 // transformation is not equivalent, and the downstream logic to replace
5561 // uses gets things wrong.
5562 if (Load->getNumValues() > 2)
5563 return false;
5564
5565 // If the load that we're shrinking is an extload and we're not just
5566 // discarding the extension we can't simply shrink the load. Bail.
5567 // TODO: It would be possible to merge the extensions in some cases.
5568 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5569 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5570 return false;
5571
5572 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5573 return false;
5574 } else {
5575 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5577 // Can't write outside the original store
5578 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5579 return false;
5580
5581 if (LegalOperations &&
5582 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5583 return false;
5584 }
5585 return true;
5586}
5587
5588bool DAGCombiner::SearchForAndLoads(SDNode *N,
5591 ConstantSDNode *Mask,
5592 SDNode *&NodeToMask) {
5593 // Recursively search for the operands, looking for loads which can be
5594 // narrowed.
5595 for (SDValue Op : N->op_values()) {
5596 if (Op.getValueType().isVector())
5597 return false;
5598
5599 // Some constants may need fixing up later if they are too large.
5600 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5601 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5602 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5603 NodesWithConsts.insert(N);
5604 continue;
5605 }
5606
5607 if (!Op.hasOneUse())
5608 return false;
5609
5610 switch(Op.getOpcode()) {
5611 case ISD::LOAD: {
5612 auto *Load = cast<LoadSDNode>(Op);
5613 EVT ExtVT;
5614 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5616
5617 // ZEXTLOAD is already small enough.
5618 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5619 ExtVT.bitsGE(Load->getMemoryVT()))
5620 continue;
5621
5622 // Use LE to convert equal sized loads to zext.
5623 if (ExtVT.bitsLE(Load->getMemoryVT()))
5624 Loads.push_back(Load);
5625
5626 continue;
5627 }
5628 return false;
5629 }
5630 case ISD::ZERO_EXTEND:
5631 case ISD::AssertZext: {
5632 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5634 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5635 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5636 Op.getOperand(0).getValueType();
5637
5638 // We can accept extending nodes if the mask is wider or an equal
5639 // width to the original type.
5640 if (ExtVT.bitsGE(VT))
5641 continue;
5642 break;
5643 }
5644 case ISD::OR:
5645 case ISD::XOR:
5646 case ISD::AND:
5647 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5648 NodeToMask))
5649 return false;
5650 continue;
5651 }
5652
5653 // Allow one node which will masked along with any loads found.
5654 if (NodeToMask)
5655 return false;
5656
5657 // Also ensure that the node to be masked only produces one data result.
5658 NodeToMask = Op.getNode();
5659 if (NodeToMask->getNumValues() > 1) {
5660 bool HasValue = false;
5661 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5663 if (VT != MVT::Glue && VT != MVT::Other) {
5664 if (HasValue) {
5665 NodeToMask = nullptr;
5666 return false;
5667 }
5668 HasValue = true;
5669 }
5670 }
5671 assert(HasValue && "Node to be masked has no data result?");
5672 }
5673 }
5674 return true;
5675}
5676
5677bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5678 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5679 if (!Mask)
5680 return false;
5681
5682 if (!Mask->getAPIntValue().isMask())
5683 return false;
5684
5685 // No need to do anything if the and directly uses a load.
5686 if (isa<LoadSDNode>(N->getOperand(0)))
5687 return false;
5688
5691 SDNode *FixupNode = nullptr;
5692 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5693 if (Loads.size() == 0)
5694 return false;
5695
5696 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5697 SDValue MaskOp = N->getOperand(1);
5698
5699 // If it exists, fixup the single node we allow in the tree that needs
5700 // masking.
5701 if (FixupNode) {
5702 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5704 FixupNode->getValueType(0),
5705 SDValue(FixupNode, 0), MaskOp);
5707 if (And.getOpcode() == ISD ::AND)
5708 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5709 }
5710
5711 // Narrow any constants that need it.
5712 for (auto *LogicN : NodesWithConsts) {
5713 SDValue Op0 = LogicN->getOperand(0);
5714 SDValue Op1 = LogicN->getOperand(1);
5715
5716 if (isa<ConstantSDNode>(Op0))
5717 std::swap(Op0, Op1);
5718
5719 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5720 Op1, MaskOp);
5721
5722 DAG.UpdateNodeOperands(LogicN, Op0, And);
5723 }
5724
5725 // Create narrow loads.
5726 for (auto *Load : Loads) {
5727 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5728 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5729 SDValue(Load, 0), MaskOp);
5730 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5731 if (And.getOpcode() == ISD ::AND)
5732 And = SDValue(
5733 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5734 SDValue NewLoad = reduceLoadWidth(And.getNode());
5735 assert(NewLoad &&
5736 "Shouldn't be masking the load if it can't be narrowed");
5737 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5738 }
5739 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5740 return true;
5741 }
5742 return false;
5743}
5744
5745// Unfold
5746// x & (-1 'logical shift' y)
5747// To
5748// (x 'opposite logical shift' y) 'logical shift' y
5749// if it is better for performance.
5750SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5751 assert(N->getOpcode() == ISD::AND);
5752
5753 SDValue N0 = N->getOperand(0);
5754 SDValue N1 = N->getOperand(1);
5755
5756 // Do we actually prefer shifts over mask?
5758 return SDValue();
5759
5760 // Try to match (-1 '[outer] logical shift' y)
5761 unsigned OuterShift;
5762 unsigned InnerShift; // The opposite direction to the OuterShift.
5763 SDValue Y; // Shift amount.
5764 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5765 if (!M.hasOneUse())
5766 return false;
5767 OuterShift = M->getOpcode();
5768 if (OuterShift == ISD::SHL)
5770 else if (OuterShift == ISD::SRL)
5772 else
5773 return false;
5774 if (!isAllOnesConstant(M->getOperand(0)))
5775 return false;
5776 Y = M->getOperand(1);
5777 return true;
5778 };
5779
5780 SDValue X;
5781 if (matchMask(N1))
5782 X = N0;
5783 else if (matchMask(N0))
5784 X = N1;
5785 else
5786 return SDValue();
5787
5788 SDLoc DL(N);
5789 EVT VT = N->getValueType(0);
5790
5791 // tmp = x 'opposite logical shift' y
5792 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5793 // ret = tmp 'logical shift' y
5794 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5795
5796 return T1;
5797}
5798
5799/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5800/// For a target with a bit test, this is expected to become test + set and save
5801/// at least 1 instruction.
5803 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5804
5805 // This is probably not worthwhile without a supported type.
5806 EVT VT = And->getValueType(0);
5807 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5808 if (!TLI.isTypeLegal(VT))
5809 return SDValue();
5810
5811 // Look through an optional extension and find a 'not'.
5812 // TODO: Should we favor test+set even without the 'not' op?
5813 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5814 if (Not.getOpcode() == ISD::ANY_EXTEND)
5815 Not = Not.getOperand(0);
5816 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5817 return SDValue();
5818
5819 // Look though an optional truncation. The source operand may not be the same
5820 // type as the original 'and', but that is ok because we are masking off
5821 // everything but the low bit.
5822 SDValue Srl = Not.getOperand(0);
5823 if (Srl.getOpcode() == ISD::TRUNCATE)
5824 Srl = Srl.getOperand(0);
5825
5826 // Match a shift-right by constant.
5827 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5828 !isa<ConstantSDNode>(Srl.getOperand(1)))
5829 return SDValue();
5830
5831 // We might have looked through casts that make this transform invalid.
5832 // TODO: If the source type is wider than the result type, do the mask and
5833 // compare in the source type.
5834 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5835 unsigned VTBitWidth = VT.getSizeInBits();
5836 if (ShiftAmt.uge(VTBitWidth))
5837 return SDValue();
5838
5839 // Turn this into a bit-test pattern using mask op + setcc:
5840 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5841 SDLoc DL(And);
5842 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5843 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5844 SDValue Mask = DAG.getConstant(
5845 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5846 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5847 SDValue Zero = DAG.getConstant(0, DL, VT);
5848 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5849 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5850}
5851
5852/// For targets that support usubsat, match a bit-hack form of that operation
5853/// that ends in 'and' and convert it.
5855 SDValue N0 = N->getOperand(0);
5856 SDValue N1 = N->getOperand(1);
5857 EVT VT = N1.getValueType();
5858
5859 // Canonicalize SRA as operand 1.
5860 if (N0.getOpcode() == ISD::SRA)
5861 std::swap(N0, N1);
5862
5863 // xor/add with SMIN (signmask) are logically equivalent.
5864 if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
5865 return SDValue();
5866
5867 if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
5868 N0.getOperand(0) != N1.getOperand(0))
5869 return SDValue();
5870
5871 unsigned BitWidth = VT.getScalarSizeInBits();
5873 ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
5874 if (!XorC || !XorC->getAPIntValue().isSignMask() ||
5875 !SraC || SraC->getAPIntValue() != BitWidth - 1)
5876 return SDValue();
5877
5878 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
5879 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
5880 SDLoc DL(N);
5881 SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
5882 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
5883}
5884
5885SDValue DAGCombiner::visitAND(SDNode *N) {
5886 SDValue N0 = N->getOperand(0);
5887 SDValue N1 = N->getOperand(1);
5888 EVT VT = N1.getValueType();
5889
5890 // x & x --> x
5891 if (N0 == N1)
5892 return N0;
5893
5894 // fold (and c1, c2) -> c1&c2
5895 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5896 return C;
5897
5898 // canonicalize constant to RHS
5901 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5902
5903 // fold vector ops
5904 if (VT.isVector()) {
5906 return FoldedVOp;
5907
5908 // fold (and x, 0) -> 0, vector edition
5910 // do not return N1, because undef node may exist in N1
5911 return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
5912 SDLoc(N), N1.getValueType());
5913
5914 // fold (and x, -1) -> x, vector edition
5916 return N0;
5917
5918 // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5921 if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5922 N0.hasOneUse() && N1.hasOneUse()) {
5923 EVT LoadVT = MLoad->getMemoryVT();
5924 EVT ExtVT = VT;
5926 // For this AND to be a zero extension of the masked load the elements
5927 // of the BuildVec must mask the bottom bits of the extended element
5928 // type
5929 if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5930 uint64_t ElementSize =
5931 LoadVT.getVectorElementType().getScalarSizeInBits();
5932 if (Splat->getAPIntValue().isMask(ElementSize)) {
5933 return DAG.getMaskedLoad(
5934 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5935 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5936 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5937 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5938 }
5939 }
5940 }
5941 }
5942 }
5943
5944 // fold (and x, -1) -> x
5945 if (isAllOnesConstant(N1))
5946 return N0;
5947
5948 // if (and x, c) is known to be zero, return 0
5949 unsigned BitWidth = VT.getScalarSizeInBits();
5952 return DAG.getConstant(0, SDLoc(N), VT);
5953
5955 return NewSel;
5956
5957 // reassociate and
5958 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5959 return RAND;
5960
5961 // Try to convert a constant mask AND into a shuffle clear mask.
5962 if (VT.isVector())
5963 if (SDValue Shuffle = XformToShuffleWithZero(N))
5964 return Shuffle;
5965
5966 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
5967 return Combined;
5968
5969 // fold (and (or x, C), D) -> D if (C & D) == D
5971 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5972 };
5973 if (N0.getOpcode() == ISD::OR &&
5975 return N1;
5976 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5977 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5978 SDValue N0Op0 = N0.getOperand(0);
5979 APInt Mask = ~N1C->getAPIntValue();
5980 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5981 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5983 N0.getValueType(), N0Op0);
5984
5985 // Replace uses of the AND with uses of the Zero extend node.
5986 CombineTo(N, Zext);
5987
5988 // We actually want to replace all uses of the any_extend with the
5989 // zero_extend, to avoid duplicating things. This will later cause this
5990 // AND to be folded.
5991 CombineTo(N0.getNode(), Zext);
5992 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5993 }
5994 }
5995
5996 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5997 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5998 // already be zero by virtue of the width of the base type of the load.
5999 //
6000 // the 'X' node here can either be nothing or an extract_vector_elt to catch
6001 // more cases.
6002 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6004 N0.getOperand(0).getOpcode() == ISD::LOAD &&
6005 N0.getOperand(0).getResNo() == 0) ||
6006 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6008 N0 : N0.getOperand(0) );
6009
6010 // Get the constant (if applicable) the zero'th operand is being ANDed with.
6011 // This can be a pure constant or a vector splat, in which case we treat the
6012 // vector as a scalar and use the splat value.
6015 Constant = C->getAPIntValue();
6017 APInt SplatValue, SplatUndef;
6018 unsigned SplatBitSize;
6019 bool HasAnyUndefs;
6020 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
6021 SplatBitSize, HasAnyUndefs);
6022 if (IsSplat) {
6023 // Undef bits can contribute to a possible optimisation if set, so
6024 // set them.
6025 SplatValue |= SplatUndef;
6026
6027 // The splat value may be something like "0x00FFFFFF", which means 0 for
6028 // the first vector value and FF for the rest, repeating. We need a mask
6029 // that will apply equally to all members of the vector, so AND all the
6030 // lanes of the constant together.
6031 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6032
6033 // If the splat value has been compressed to a bitlength lower
6034 // than the size of the vector lane, we need to re-expand it to
6035 // the lane size.
6036 if (EltBitWidth > SplatBitSize)
6037 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
6038 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
6039 SplatValue |= SplatValue.shl(SplatBitSize);
6040
6041 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6042 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6043 if ((SplatBitSize % EltBitWidth) == 0) {
6045 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6046 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6047 }
6048 }
6049 }
6050
6051 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
6052 // actually legal and isn't going to get expanded, else this is a false
6053 // optimisation.
6055 Load->getValueType(0),
6056 Load->getMemoryVT());
6057
6058 // Resize the constant to the same size as the original memory access before
6059 // extension. If it is still the AllOnesValue then this AND is completely
6060 // unneeded.
6061 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
6062
6063 bool B;
6064 switch (Load->getExtensionType()) {
6065 default: B = false; break;
6066 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
6067 case ISD::ZEXTLOAD:
6068 case ISD::NON_EXTLOAD: B = true; break;
6069 }
6070
6071 if (B && Constant.isAllOnes()) {
6072 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
6073 // preserve semantics once we get rid of the AND.
6074 SDValue NewLoad(Load, 0);
6075
6076 // Fold the AND away. NewLoad may get replaced immediately.
6077 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
6078
6079 if (Load->getExtensionType() == ISD::EXTLOAD) {
6080 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
6081 Load->getValueType(0), SDLoc(Load),
6082 Load->getChain(), Load->getBasePtr(),
6083 Load->getOffset(), Load->getMemoryVT(),
6084 Load->getMemOperand());
6085 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6086 if (Load->getNumValues() == 3) {
6087 // PRE/POST_INC loads have 3 values.
6088 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6089 NewLoad.getValue(2) };
6090 CombineTo(Load, To, 3, true);
6091 } else {
6092 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6093 }
6094 }
6095
6096 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6097 }
6098 }
6099
6100 // fold (and (masked_gather x)) -> (zext_masked_gather x)
6101 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6102 EVT MemVT = GN0->getMemoryVT();
6103 EVT ScalarVT = MemVT.getScalarType();
6104
6105 if (SDValue(GN0, 0).hasOneUse() &&
6108 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
6109 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
6110
6112 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6113 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6114
6115 CombineTo(N, ZExtLoad);
6116 AddToWorklist(ZExtLoad.getNode());
6117 // Avoid recheck of N.
6118 return SDValue(N, 0);
6119 }
6120 }
6121
6122 // fold (and (load x), 255) -> (zextload x, i8)
6123 // fold (and (extload x, i16), 255) -> (zextload x, i8)
6124 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
6125 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
6126 (N0.getOpcode() == ISD::ANY_EXTEND &&
6127 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
6128 if (SDValue Res = reduceLoadWidth(N)) {
6131 AddToWorklist(N);
6133 return SDValue(N, 0);
6134 }
6135 }
6136
6137 if (LegalTypes) {
6138 // Attempt to propagate the AND back up to the leaves which, if they're
6139 // loads, can be combined to narrow loads and the AND node can be removed.
6140 // Perform after legalization so that extend nodes will already be
6141 // combined into the loads.
6143 return SDValue(N, 0);
6144 }
6145
6146 if (SDValue Combined = visitANDLike(N0, N1, N))
6147 return Combined;
6148
6149 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
6150 if (N0.getOpcode() == N1.getOpcode())
6152 return V;
6153
6154 // Masking the negated extension of a boolean is just the zero-extended
6155 // boolean:
6156 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6157 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6158 //
6159 // Note: the SimplifyDemandedBits fold below can make an information-losing
6160 // transform, and then we have no way to find this better fold.
6161 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6162 if (isNullOrNullSplat(N0.getOperand(0))) {
6163 SDValue SubRHS = N0.getOperand(1);
6164 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6165 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6166 return SubRHS;
6167 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6168 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6169 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6170 }
6171 }
6172
6173 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6174 // fold (and (sra)) -> (and (srl)) when possible.
6176 return SDValue(N, 0);
6177
6178 // fold (zext_inreg (extload x)) -> (zextload x)
6179 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6180 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6181 (ISD::isEXTLoad(N0.getNode()) ||
6182 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6184 EVT MemVT = LN0->getMemoryVT();
6185 // If we zero all the possible extended bits, then we can turn this into
6186 // a zextload if we are running before legalize or the operation is legal.
6187 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6188 unsigned MemBitSize = MemVT.getScalarSizeInBits();
6190 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6191 ((!LegalOperations && LN0->isSimple()) ||
6192 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6194 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6195 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6196 AddToWorklist(N);
6197 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6198 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6199 }
6200 }
6201
6202 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6203 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6205 N0.getOperand(1), false))
6206 return BSwap;
6207 }
6208
6210 return Shifts;
6211
6212 if (TLI.hasBitTest(N0, N1))
6213 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6214 return V;
6215
6216 // Recognize the following pattern:
6217 //
6218 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6219 //
6220 // where bitmask is a mask that clears the upper bits of AndVT. The
6221 // number of bits in bitmask must be a power of two.
6222 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6223 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6224 return false;
6225
6226 auto *C = dyn_cast<ConstantSDNode>(RHS);
6227 if (!C)
6228 return false;
6229
6230 if (!C->getAPIntValue().isMask(
6231 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6232 return false;
6233
6234 return true;
6235 };
6236
6237 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6238 if (IsAndZeroExtMask(N0, N1))
6239 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6240
6241 if (hasOperation(ISD::USUBSAT, VT))
6242 if (SDValue V = foldAndToUsubsat(N, DAG))
6243 return V;
6244
6245 return SDValue();
6246}
6247
6248/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6249SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6250 bool DemandHighBits) {
6251 if (!LegalOperations)
6252 return SDValue();
6253
6254 EVT VT = N->getValueType(0);
6255 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6256 return SDValue();
6258 return SDValue();
6259
6260 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6261 bool LookPassAnd0 = false;
6262 bool LookPassAnd1 = false;
6263 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6264 std::swap(N0, N1);
6265 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6266 std::swap(N0, N1);
6267 if (N0.getOpcode() == ISD::AND) {
6268 if (!N0.getNode()->hasOneUse())
6269 return SDValue();
6271 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6272 // This is needed for X86.
6273 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6274 N01C->getZExtValue() != 0xFFFF))
6275 return SDValue();
6276 N0 = N0.getOperand(0);
6277 LookPassAnd0 = true;
6278 }
6279
6280 if (N1.getOpcode() == ISD::AND) {
6281 if (!N1.getNode()->hasOneUse())
6282 return SDValue();
6284 if (!N11C || N11C->getZExtValue() != 0xFF)
6285 return SDValue();
6286 N1 = N1.getOperand(0);
6287 LookPassAnd1 = true;
6288 }
6289
6290 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6291 std::swap(N0, N1);
6292 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6293 return SDValue();
6294 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6295 return SDValue();
6296
6299 if (!N01C || !N11C)
6300 return SDValue();
6301 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6302 return SDValue();
6303
6304 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6305 SDValue N00 = N0->getOperand(0);
6306 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6307 if (!N00.getNode()->hasOneUse())
6308 return SDValue();
6310 if (!N001C || N001C->getZExtValue() != 0xFF)
6311 return SDValue();
6312 N00 = N00.getOperand(0);
6313 LookPassAnd0 = true;
6314 }
6315
6316 SDValue N10 = N1->getOperand(0);
6317 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6318 if (!N10.getNode()->hasOneUse())
6319 return SDValue();
6321 // Also allow 0xFFFF since the bits will be shifted out. This is needed
6322 // for X86.
6323 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6324 N101C->getZExtValue() != 0xFFFF))
6325 return SDValue();
6326 N10 = N10.getOperand(0);
6327 LookPassAnd1 = true;
6328 }
6329
6330 if (N00 != N10)
6331 return SDValue();
6332
6333 // Make sure everything beyond the low halfword gets set to zero since the SRL
6334 // 16 will clear the top bits.
6335 unsigned OpSizeInBits = VT.getSizeInBits();
6336 if (DemandHighBits && OpSizeInBits > 16) {
6337 // If the left-shift isn't masked out then the only way this is a bswap is
6338 // if all bits beyond the low 8 are 0. In that case the entire pattern
6339 // reduces to a left shift anyway: leave it for other parts of the combiner.
6340 if (!LookPassAnd0)
6341 return SDValue();
6342
6343 // However, if the right shift isn't masked out then it might be because
6344 // it's not needed. See if we can spot that too.
6345 if (!LookPassAnd1 &&
6346 !DAG.MaskedValueIsZero(
6348 return SDValue();
6349 }
6350
6351 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6352 if (OpSizeInBits > 16) {
6353 SDLoc DL(N);
6354 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6355 DAG.getConstant(OpSizeInBits - 16, DL,
6356 getShiftAmountTy(VT)));
6357 }
6358 return Res;
6359}
6360
6361/// Return true if the specified node is an element that makes up a 32-bit
6362/// packed halfword byteswap.
6363/// ((x & 0x000000ff) << 8) |
6364/// ((x & 0x0000ff00) >> 8) |
6365/// ((x & 0x00ff0000) << 8) |
6366/// ((x & 0xff000000) >> 8)
6368 if (!N.getNode()->hasOneUse())
6369 return false;
6370
6371 unsigned Opc = N.getOpcode();
6372 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6373 return false;
6374
6375 SDValue N0 = N.getOperand(0);
6376 unsigned Opc0 = N0.getOpcode();
6377 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6378 return false;
6379
6380 ConstantSDNode *N1C = nullptr;
6381 // SHL or SRL: look upstream for AND mask operand
6382 if (Opc == ISD::AND)
6383 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6384 else if (Opc0 == ISD::AND)
6386 if (!N1C)
6387 return false;
6388
6389 unsigned MaskByteOffset;
6390 switch (N1C->getZExtValue()) {
6391 default:
6392 return false;
6393 case 0xFF: MaskByteOffset = 0; break;
6394 case 0xFF00: MaskByteOffset = 1; break;
6395 case 0xFFFF:
6396 // In case demanded bits didn't clear the bits that will be shifted out.
6397 // This is needed for X86.
6398 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6399 MaskByteOffset = 1;
6400 break;
6401 }
6402 return false;
6403 case 0xFF0000: MaskByteOffset = 2; break;
6404 case 0xFF000000: MaskByteOffset = 3; break;
6405 }
6406
6407 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6408 if (Opc == ISD::AND) {
6409 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6410 // (x >> 8) & 0xff
6411 // (x >> 8) & 0xff0000
6412 if (Opc0 != ISD::SRL)
6413 return false;
6415 if (!C || C->getZExtValue() != 8)
6416 return false;
6417 } else {
6418 // (x << 8) & 0xff00
6419 // (x << 8) & 0xff000000
6420 if (Opc0 != ISD::SHL)
6421 return false;
6423 if (!C || C->getZExtValue() != 8)
6424 return false;
6425 }
6426 } else if (Opc == ISD::SHL) {
6427 // (x & 0xff) << 8
6428 // (x & 0xff0000) << 8
6429 if (MaskByteOffset != 0 && MaskByteOffset != 2)
6430 return false;
6431 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6432 if (!C || C->getZExtValue() != 8)
6433 return false;
6434 } else { // Opc == ISD::SRL
6435 // (x & 0xff00) >> 8
6436 // (x & 0xff000000) >> 8
6437 if (MaskByteOffset != 1 && MaskByteOffset != 3)
6438 return false;
6439 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6440 if (!C || C->getZExtValue() != 8)
6441 return false;
6442 }
6443
6444 if (Parts[MaskByteOffset])
6445 return false;
6446
6447 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6448 return true;
6449}
6450
6451// Match 2 elements of a packed halfword bswap.
6453 if (N.getOpcode() == ISD::OR)
6454 return isBSwapHWordElement(N.getOperand(0), Parts) &&
6455 isBSwapHWordElement(N.getOperand(1), Parts);
6456
6457 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6458 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6459 if (!C || C->getAPIntValue() != 16)
6460 return false;
6461 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6462 return true;
6463 }
6464
6465 return false;
6466}
6467
6468// Match this pattern:
6469// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6470// And rewrite this to:
6471// (rotr (bswap A), 16)
6473 SelectionDAG &DAG, SDNode *N, SDValue N0,
6475 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6476 "MatchBSwapHWordOrAndAnd: expecting i32");
6477 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6478 return SDValue();
6479 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6480 return SDValue();
6481 // TODO: this is too restrictive; lifting this restriction requires more tests
6482 if (!N0->hasOneUse() || !N1->hasOneUse())
6483 return SDValue();
6485 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6486 if (!Mask0 || !Mask1)
6487 return SDValue();
6488 if (Mask0->getAPIntValue() != 0xff00ff00 ||
6489 Mask1->getAPIntValue() != 0x00ff00ff)
6490 return SDValue();
6491 SDValue Shift0 = N0.getOperand(0);
6492 SDValue Shift1 = N1.getOperand(0);
6493 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6494 return SDValue();
6497 if (!ShiftAmt0 || !ShiftAmt1)
6498 return SDValue();
6499 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6500 return SDValue();
6501 if (Shift0.getOperand(0) != Shift1.getOperand(0))
6502 return SDValue();
6503
6504 SDLoc DL(N);
6505 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6506 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6507 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6508}
6509
6510/// Match a 32-bit packed halfword bswap. That is
6511/// ((x & 0x000000ff) << 8) |
6512/// ((x & 0x0000ff00) >> 8) |
6513/// ((x & 0x00ff0000) << 8) |
6514/// ((x & 0xff000000) >> 8)
6515/// => (rotl (bswap x), 16)
6516SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6517 if (!LegalOperations)
6518 return SDValue();
6519
6520 EVT VT = N->getValueType(0);
6521 if (VT != MVT::i32)
6522 return SDValue();
6524 return SDValue();
6525
6526 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6527 getShiftAmountTy(VT)))
6528 return BSwap;
6529
6530 // Try again with commuted operands.
6531 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6532 getShiftAmountTy(VT)))
6533 return BSwap;
6534
6535
6536 // Look for either
6537 // (or (bswaphpair), (bswaphpair))
6538 // (or (or (bswaphpair), (and)), (and))
6539 // (or (or (and), (bswaphpair)), (and))
6540 SDNode *Parts[4] = {};
6541
6542 if (isBSwapHWordPair(N0, Parts)) {
6543 // (or (or (and), (and)), (or (and), (and)))
6544 if (!isBSwapHWordPair(N1, Parts))
6545 return SDValue();
6546 } else if (N0.getOpcode() == ISD::OR) {
6547 // (or (or (or (and), (and)), (and)), (and))
6548 if (!isBSwapHWordElement(N1, Parts))
6549 return SDValue();
6550 SDValue N00 = N0.getOperand(0);
6551 SDValue N01 = N0.getOperand(1);
6552 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6553 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6554 return SDValue();
6555 } else
6556 return SDValue();
6557
6558 // Make sure the parts are all coming from the same node.
6559 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6560 return SDValue();
6561
6562 SDLoc DL(N);
6563 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6564 SDValue(Parts[0], 0));
6565
6566 // Result of the bswap should be rotated by 16. If it's not legal, then
6567 // do (x << 16) | (x >> 16).
6568 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6570 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6572 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6573 return DAG.getNode(ISD::OR, DL, VT,
6574 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6575 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6576}
6577
6578/// This contains all DAGCombine rules which reduce two values combined by
6579/// an Or operation to a single value \see visitANDLike().
6580SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6581 EVT VT = N1.getValueType();
6582 SDLoc DL(N);
6583
6584 // fold (or x, undef) -> -1
6585 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6586 return DAG.getAllOnesConstant(DL, VT);
6587
6588 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6589 return V;
6590
6591 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6592 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6593 // Don't increase # computations.
6594 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6595 // We can only do this xform if we know that bits from X that are set in C2
6596 // but not in C1 are already zero. Likewise for Y.
6597 if (const ConstantSDNode *N0O1C =
6599 if (const ConstantSDNode *N1O1C =
6600 getAsNonOpaqueConstant(N1.getOperand(1))) {
6601 // We can only do this xform if we know that bits from X that are set in
6602 // C2 but not in C1 are already zero. Likewise for Y.
6603 const APInt &LHSMask = N0O1C->getAPIntValue();
6604 const APInt &RHSMask = N1O1C->getAPIntValue();
6605
6606 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6607 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6608 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6609 N0.getOperand(0), N1.getOperand(0));
6610 return DAG.getNode(ISD::AND, DL, VT, X,
6611 DAG.getConstant(LHSMask | RHSMask, DL, VT));
6612 }
6613 }
6614 }
6615 }
6616
6617 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6618 if (N0.getOpcode() == ISD::AND &&
6619 N1.getOpcode() == ISD::AND &&
6620 N0.getOperand(0) == N1.getOperand(0) &&
6621 // Don't increase # computations.
6622 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6623 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6624 N0.getOperand(1), N1.getOperand(1));
6625 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6626 }
6627
6628 return SDValue();
6629}
6630
6631/// OR combines for which the commuted variant will be tried as well.
6633 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6634 EVT VT = N0.getValueType();
6635 if (N0.getOpcode() == ISD::AND) {
6636 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6637 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6638 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6639
6640 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6641 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6642 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6643 }
6644
6645 return SDValue();
6646}
6647
6648SDValue DAGCombiner::visitOR(SDNode *N) {
6649 SDValue N0 = N->getOperand(0);
6650 SDValue N1 = N->getOperand(1);
6651 EVT VT = N1.getValueType();
6652
6653 // x | x --> x
6654 if (N0 == N1)
6655 return N0;
6656
6657 // fold (or c1, c2) -> c1|c2
6658 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6659 return C;
6660
6661 // canonicalize constant to RHS
6664 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6665
6666 // fold vector ops
6667 if (VT.isVector()) {
6669 return FoldedVOp;
6670
6671 // fold (or x, 0) -> x, vector edition
6673 return N0;
6674
6675 // fold (or x, -1) -> -1, vector edition
6677 // do not return N1, because undef node may exist in N1
6678 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6679
6680 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6681 // Do this only if the resulting shuffle is legal.
6682 if (isa<ShuffleVectorSDNode>(N0) &&
6684 // Avoid folding a node with illegal type.
6685 TLI.isTypeLegal(VT)) {
6688 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6689 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6690 // Ensure both shuffles have a zero input.
6691 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6692 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6693 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6696 bool CanFold = true;
6697 int NumElts = VT.getVectorNumElements();
6699
6700 for (int i = 0; i != NumElts; ++i) {
6701 int M0 = SV0->getMaskElt(i);
6702 int M1 = SV1->getMaskElt(i);
6703
6704 // Determine if either index is pointing to a zero vector.
6705 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6706 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6707
6708 // If one element is zero and the otherside is undef, keep undef.
6709 // This also handles the case that both are undef.
6710 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6711 Mask[i] = -1;
6712 continue;
6713 }
6714
6715 // Make sure only one of the elements is zero.
6716 if (M0Zero == M1Zero) {
6717 CanFold = false;
6718 break;
6719 }
6720
6721 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6722
6723 // We have a zero and non-zero element. If the non-zero came from
6724 // SV0 make the index a LHS index. If it came from SV1, make it
6725 // a RHS index. We need to mod by NumElts because we don't care
6726 // which operand it came from in the original shuffles.
6727 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6728 }
6729
6730 if (CanFold) {
6731 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6732 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6733
6736 Mask, DAG);
6737 if (LegalShuffle)
6738 return LegalShuffle;
6739 }
6740 }
6741 }
6742 }
6743
6744 // fold (or x, 0) -> x
6745 if (isNullConstant(N1))
6746 return N0;
6747
6748 // fold (or x, -1) -> -1
6749 if (isAllOnesConstant(N1))
6750 return N1;
6751
6753 return NewSel;
6754
6755 // fold (or x, c) -> c iff (x & ~c) == 0
6757 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6758 return N1;
6759
6760 if (SDValue Combined = visitORLike(N0, N1, N))
6761 return Combined;
6762
6763 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6764 return Combined;
6765
6766 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6767 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6768 return BSwap;
6769 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6770 return BSwap;
6771
6772 // reassociate or
6773 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6774 return ROR;
6775
6776 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6777 // iff (c1 & c2) != 0 or c1/c2 are undef.
6779 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6780 };
6781 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6784 {N1, N0.getOperand(1)})) {
6785 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6786 AddToWorklist(IOR.getNode());
6787 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6788 }
6789 }
6790
6791 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6792 return Combined;
6793 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6794 return Combined;
6795
6796 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6797 if (N0.getOpcode() == N1.getOpcode())
6799 return V;
6800
6801 // See if this is some rotate idiom.
6802 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6803 return Rot;
6804
6805 if (SDValue Load = MatchLoadCombine(N))
6806 return Load;
6807
6808 // Simplify the operands using demanded-bits information.
6810 return SDValue(N, 0);
6811
6812 // If OR can be rewritten into ADD, try combines based on ADD.
6813 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6814 DAG.haveNoCommonBitsSet(N0, N1))
6816 return Combined;
6817
6818 return SDValue();
6819}
6820
6822 if (Op.getOpcode() == ISD::AND &&
6823 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6824 Mask = Op.getOperand(1);
6825 return Op.getOperand(0);
6826 }
6827 return Op;
6828}
6829
6830/// Match "(X shl/srl V1) & V2" where V2 may not be present.
6831static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6832 SDValue &Mask) {
6833 Op = stripConstantMask(DAG, Op, Mask);
6834 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6835 Shift = Op;
6836 return true;
6837 }
6838 return false;
6839}
6840
6841/// Helper function for visitOR to extract the needed side of a rotate idiom
6842/// from a shl/srl/mul/udiv. This is meant to handle cases where
6843/// InstCombine merged some outside op with one of the shifts from
6844/// the rotate pattern.
6845/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6846/// Otherwise, returns an expansion of \p ExtractFrom based on the following
6847/// patterns:
6848///
6849/// (or (add v v) (shrl v bitwidth-1)):
6850/// expands (add v v) -> (shl v 1)
6851///
6852/// (or (mul v c0) (shrl (mul v c1) c2)):
6853/// expands (mul v c0) -> (shl (mul v c1) c3)
6854///
6855/// (or (udiv v c0) (shl (udiv v c1) c2)):
6856/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6857///
6858/// (or (shl v c0) (shrl (shl v c1) c2)):
6859/// expands (shl v c0) -> (shl (shl v c1) c3)
6860///
6861/// (or (shrl v c0) (shl (shrl v c1) c2)):
6862/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6863///
6864/// Such that in all cases, c3+c2==bitwidth(op v c1).
6867 const SDLoc &DL) {
6868 assert(OppShift && ExtractFrom && "Empty SDValue");
6869 assert(
6870 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6871 "Existing shift must be valid as a rotate half");
6872
6874
6875 // Value and Type of the shift.
6876 SDValue OppShiftLHS = OppShift.getOperand(0);
6877 EVT ShiftedVT = OppShiftLHS.getValueType();
6878
6879 // Amount of the existing shift.
6881
6882 // (add v v) -> (shl v 1)
6883 // TODO: Should this be a general DAG canonicalization?
6884 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6885 ExtractFrom.getOpcode() == ISD::ADD &&
6886 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6887 ExtractFrom.getOperand(0) == OppShiftLHS &&
6888 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6889 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6891
6892 // Preconditions:
6893 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6894 //
6895 // Find opcode of the needed shift to be extracted from (op0 v c0).
6896 unsigned Opcode = ISD::DELETED_NODE;
6897 bool IsMulOrDiv = false;
6898 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6899 // opcode or its arithmetic (mul or udiv) variant.
6900 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6901 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6902 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6903 return false;
6904 Opcode = NeededShift;
6905 return true;
6906 };
6907 // op0 must be either the needed shift opcode or the mul/udiv equivalent
6908 // that the needed shift can be extracted from.
6909 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6910 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6911 return SDValue();
6912
6913 // op0 must be the same opcode on both sides, have the same LHS argument,
6914 // and produce the same value type.
6915 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6916 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6917 ShiftedVT != ExtractFrom.getValueType())
6918 return SDValue();
6919
6920 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6922 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6924 isConstOrConstSplat(ExtractFrom.getOperand(1));
6925 // TODO: We should be able to handle non-uniform constant vectors for these values
6926 // Check that we have constant values.
6927 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6928 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6929 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6930 return SDValue();
6931
6932 // Compute the shift amount we need to extract to complete the rotate.
6933 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6934 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6935 return SDValue();
6936 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6937 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6938 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6939 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6941
6942 // Now try extract the needed shift from the ExtractFrom op and see if the
6943 // result matches up with the existing shift's LHS op.
6944 if (IsMulOrDiv) {
6945 // Op to extract from is a mul or udiv by a constant.
6946 // Check:
6947 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6948 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6949 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6950 NeededShiftAmt.getZExtValue());
6952 APInt Rem;
6954 if (Rem != 0 || ResultAmt != OppLHSAmt)
6955 return SDValue();
6956 } else {
6957 // Op to extract from is a shift by a constant.
6958 // Check:
6959 // c2 - (bitwidth(op0 v c0) - c1) == c0
6960 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6961 ExtractFromAmt.getBitWidth()))
6962 return SDValue();
6963 }
6964
6965 // Return the expanded shift op that should allow a rotate to be formed.
6966 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6967 EVT ResVT = ExtractFrom.getValueType();
6969 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6970}
6971
6972// Return true if we can prove that, whenever Neg and Pos are both in the
6973// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6974// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6975//
6976// (or (shift1 X, Neg), (shift2 X, Pos))
6977//
6978// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6979// in direction shift1 by Neg. The range [0, EltSize) means that we only need
6980// to consider shift amounts with defined behavior.
6981//
6982// The IsRotate flag should be set when the LHS of both shifts is the same.
6983// Otherwise if matching a general funnel shift, it should be clear.
6984static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6985 SelectionDAG &DAG, bool IsRotate) {
6986 // If EltSize is a power of 2 then:
6987 //
6988 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6989 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6990 //
6991 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6992 // for the stronger condition:
6993 //
6994 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6995 //
6996 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6997 // we can just replace Neg with Neg' for the rest of the function.
6998 //
6999 // In other cases we check for the even stronger condition:
7000 //
7001 // Neg == EltSize - Pos [B]
7002 //
7003 // for all Neg and Pos. Note that the (or ...) then invokes undefined
7004 // behavior if Pos == 0 (and consequently Neg == EltSize).
7005 //
7006 // We could actually use [A] whenever EltSize is a power of 2, but the
7007 // only extra cases that it would match are those uninteresting ones
7008 // where Neg and Pos are never in range at the same time. E.g. for
7009 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
7010 // as well as (sub 32, Pos), but:
7011 //
7012 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
7013 //
7014 // always invokes undefined behavior for 32-bit X.
7015 //
7016 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
7017 //
7018 // NOTE: We can only do this when matching an AND and not a general
7019 // funnel shift.
7020 unsigned MaskLoBits = 0;
7021 if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
7023 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
7024 unsigned Bits = Log2_64(EltSize);
7025 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
7026 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
7027 Neg = Neg.getOperand(0);
7028 MaskLoBits = Bits;
7029 }
7030 }
7031 }
7032
7033 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
7034 if (Neg.getOpcode() != ISD::SUB)
7035 return false;
7037 if (!NegC)
7038 return false;
7039 SDValue NegOp1 = Neg.getOperand(1);
7040
7041 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
7042 // Pos'. The truncation is redundant for the purpose of the equality.
7043 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
7045 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
7046 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
7047 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
7048 MaskLoBits))
7049 Pos = Pos.getOperand(0);
7050 }
7051 }
7052
7053 // The condition we need is now:
7054 //
7055 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
7056 //
7057 // If NegOp1 == Pos then we need:
7058 //
7059 // EltSize & Mask == NegC & Mask
7060 //
7061 // (because "x & Mask" is a truncation and distributes through subtraction).
7062 //
7063 // We also need to account for a potential truncation of NegOp1 if the amount
7064 // has already been legalized to a shift amount type.
7065 APInt Width;
7066 if ((Pos == NegOp1) ||
7067 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
7068 Width = NegC->getAPIntValue();
7069
7070 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
7071 // Then the condition we want to prove becomes:
7072 //
7073 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
7074 //
7075 // which, again because "x & Mask" is a truncation, becomes:
7076 //
7077 // NegC & Mask == (EltSize - PosC) & Mask
7078 // EltSize & Mask == (NegC + PosC) & Mask
7079 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
7081 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
7082 else
7083 return false;
7084 } else
7085 return false;
7086
7087 // Now we just need to check that EltSize & Mask == Width & Mask.
7088 if (MaskLoBits)
7089 // EltSize & Mask is 0 since Mask is EltSize - 1.
7090 return Width.getLoBits(MaskLoBits) == 0;
7091 return Width == EltSize;
7092}
7093
7094// A subroutine of MatchRotate used once we have found an OR of two opposite
7095// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
7096// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7097// former being preferred if supported. InnerPos and InnerNeg are Pos and
7098// Neg with outer conversions stripped away.
7099SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7101 SDValue InnerNeg, unsigned PosOpcode,
7102 unsigned NegOpcode, const SDLoc &DL) {
7103 // fold (or (shl x, (*ext y)),
7104 // (srl x, (*ext (sub 32, y)))) ->
7105 // (rotl x, y) or (rotr x, (sub 32, y))
7106 //
7107 // fold (or (shl x, (*ext (sub 32, y))),
7108 // (srl x, (*ext y))) ->
7109 // (rotr x, y) or (rotl x, (sub 32, y))
7110 EVT VT = Shifted.getValueType();
7112 /*IsRotate*/ true)) {
7114 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7115 HasPos ? Pos : Neg);
7116 }
7117
7118 return SDValue();
7119}
7120
7121// A subroutine of MatchRotate used once we have found an OR of two opposite
7122// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
7123// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7124// former being preferred if supported. InnerPos and InnerNeg are Pos and
7125// Neg with outer conversions stripped away.
7126// TODO: Merge with MatchRotatePosNeg.
7127SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7129 SDValue InnerNeg, unsigned PosOpcode,
7130 unsigned NegOpcode, const SDLoc &DL) {
7131 EVT VT = N0.getValueType();
7132 unsigned EltBits = VT.getScalarSizeInBits();
7133
7134 // fold (or (shl x0, (*ext y)),
7135 // (srl x1, (*ext (sub 32, y)))) ->
7136 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7137 //
7138 // fold (or (shl x0, (*ext (sub 32, y))),
7139 // (srl x1, (*ext y))) ->
7140 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7141 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7143 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7144 HasPos ? Pos : Neg);
7145 }
7146
7147 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7148 // so for now just use the PosOpcode case if its legal.
7149 // TODO: When can we use the NegOpcode case?
7151 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7152 if (Op.getOpcode() != BinOpc)
7153 return false;
7154 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7155 return Cst && (Cst->getAPIntValue() == Imm);
7156 };
7157
7158 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7159 // -> (fshl x0, x1, y)
7160 if (IsBinOpImm(N1, ISD::SRL, 1) &&
7162 InnerPos == InnerNeg.getOperand(0) &&
7164 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7165 }
7166
7167 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7168 // -> (fshr x0, x1, y)
7169 if (IsBinOpImm(N0, ISD::SHL, 1) &&
7171 InnerNeg == InnerPos.getOperand(0) &&
7173 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7174 }
7175
7176 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7177 // -> (fshr x0, x1, y)
7178 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7179 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7181 InnerNeg == InnerPos.getOperand(0) &&
7183 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7184 }
7185 }
7186
7187 return SDValue();
7188}
7189
7190// MatchRotate - Handle an 'or' of two operands. If this is one of the many
7191// idioms for rotate, and if the target supports rotation instructions, generate
7192// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7193// with different shifted sources.
7194SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7195 EVT VT = LHS.getValueType();
7196
7197 // The target must have at least one rotate/funnel flavor.
7198 // We still try to match rotate by constant pre-legalization.
7199 // TODO: Support pre-legalization funnel-shift by constant.
7200 bool HasROTL = hasOperation(ISD::ROTL, VT);
7201 bool HasROTR = hasOperation(ISD::ROTR, VT);
7202 bool HasFSHL = hasOperation(ISD::FSHL, VT);
7203 bool HasFSHR = hasOperation(ISD::FSHR, VT);
7204 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7205 return SDValue();
7206
7207 // Check for truncated rotate.
7208 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7209 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7210 assert(LHS.getValueType() == RHS.getValueType());
7211 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7212 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7213 }
7214 }
7215
7216 // Match "(X shl/srl V1) & V2" where V2 may not be present.
7217 SDValue LHSShift; // The shift.
7218 SDValue LHSMask; // AND value if any.
7219 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7220
7221 SDValue RHSShift; // The shift.
7222 SDValue RHSMask; // AND value if any.
7223 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7224
7225 // If neither side matched a rotate half, bail
7226 if (!LHSShift && !RHSShift)
7227 return SDValue();
7228
7229 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7230 // side of the rotate, so try to handle that here. In all cases we need to
7231 // pass the matched shift from the opposite side to compute the opcode and
7232 // needed shift amount to extract. We still want to do this if both sides
7233 // matched a rotate half because one half may be a potential overshift that
7234 // can be broken down (ie if InstCombine merged two shl or srl ops into a
7235 // single one).
7236
7237 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7238 if (LHSShift)
7239 if (SDValue NewRHSShift =
7242 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7243 if (RHSShift)
7244 if (SDValue NewLHSShift =
7247
7248 // If a side is still missing, nothing else we can do.
7249 if (!RHSShift || !LHSShift)
7250 return SDValue();
7251
7252 // At this point we've matched or extracted a shift op on each side.
7253
7254 if (LHSShift.getOpcode() == RHSShift.getOpcode())
7255 return SDValue(); // Shifts must disagree.
7256
7257 // TODO: Support pre-legalization funnel-shift by constant.
7258 bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7259 if (!IsRotate && !(HasFSHL || HasFSHR))
7260 return SDValue(); // Requires funnel shift support.
7261
7262 // Canonicalize shl to left side in a shl/srl pair.
7263 if (RHSShift.getOpcode() == ISD::SHL) {
7264 std::swap(LHS, RHS);
7267 }
7268
7269 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7270 SDValue LHSShiftArg = LHSShift.getOperand(0);
7271 SDValue LHSShiftAmt = LHSShift.getOperand(1);
7272 SDValue RHSShiftArg = RHSShift.getOperand(0);
7273 SDValue RHSShiftAmt = RHSShift.getOperand(1);
7274
7275 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7276 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7277 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7278 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7279 // iff C1+C2 == EltSizeInBits
7282 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7283 };
7285 SDValue Res;
7286 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7287 bool UseROTL = !LegalOperations || HasROTL;
7288 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7290 } else {
7291 bool UseFSHL = !LegalOperations || HasFSHL;
7292 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7294 }
7295
7296 // If there is an AND of either shifted operand, apply it to the result.
7297 if (LHSMask.getNode() || RHSMask.getNode()) {
7298 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7299 SDValue Mask = AllOnes;
7300
7301 if (LHSMask.getNode()) {
7302 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7303 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7304 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7305 }
7306 if (RHSMask.getNode()) {
7307 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7308 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7309 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7310 }
7311
7312 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7313 }
7314
7315 return Res;
7316 }
7317
7318 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7319 // shift.
7320 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7321 return SDValue();
7322
7323 // If there is a mask here, and we have a variable shift, we can't be sure
7324 // that we're masking out the right stuff.
7325 if (LHSMask.getNode() || RHSMask.getNode())
7326 return SDValue();
7327
7328 // If the shift amount is sign/zext/any-extended just peel it off.
7331 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7332 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7333 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7334 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7335 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7336 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7337 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7338 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7339 LExtOp0 = LHSShiftAmt.getOperand(0);
7340 RExtOp0 = RHSShiftAmt.getOperand(0);
7341 }
7342
7343 if (IsRotate && (HasROTL || HasROTR)) {
7344 SDValue TryL =
7347 if (TryL)
7348 return TryL;
7349
7350 SDValue TryR =
7353 if (TryR)
7354 return TryR;
7355 }
7356
7357 SDValue TryL =
7360 if (TryL)
7361 return TryL;
7362
7363 SDValue TryR =
7366 if (TryR)
7367 return TryR;
7368
7369 return SDValue();
7370}
7371
7372namespace {
7373
7374/// Represents known origin of an individual byte in load combine pattern. The
7375/// value of the byte is either constant zero or comes from memory.
7376struct ByteProvider {
7377 // For constant zero providers Load is set to nullptr. For memory providers
7378 // Load represents the node which loads the byte from memory.
7379 // ByteOffset is the offset of the byte in the value produced by the load.
7380 LoadSDNode *Load = nullptr;
7381 unsigned ByteOffset = 0;
7382
7383 ByteProvider() = default;
7384
7385 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7386 return ByteProvider(Load, ByteOffset);
7387 }
7388
7389 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7390
7391 bool isConstantZero() const { return !Load; }
7392 bool isMemory() const { return Load; }
7393
7394 bool operator==(const ByteProvider &Other) const {
7395 return Other.Load == Load && Other.ByteOffset == ByteOffset;
7396 }
7397
7398private:
7399 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7400 : Load(Load), ByteOffset(ByteOffset) {}
7401};
7402
7403} // end anonymous namespace
7404
7405/// Recursively traverses the expression calculating the origin of the requested
7406/// byte of the given value. Returns None if the provider can't be calculated.
7407///
7408/// For all the values except the root of the expression verifies that the value
7409/// has exactly one use and if it's not true return None. This way if the origin
7410/// of the byte is returned it's guaranteed that the values which contribute to
7411/// the byte are not used outside of this expression.
7412///
7413/// Because the parts of the expression are not allowed to have more than one
7414/// use this function iterates over trees, not DAGs. So it never visits the same
7415/// node more than once.
7416static const Optional<ByteProvider>
7417calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7418 bool Root = false) {
7419 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7420 if (Depth == 10)
7421 return None;
7422
7423 if (!Root && !Op.hasOneUse())
7424 return None;
7425
7426 assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7427 unsigned BitWidth = Op.getValueSizeInBits();
7428 if (BitWidth % 8 != 0)
7429 return None;
7430 unsigned ByteWidth = BitWidth / 8;
7431 assert(Index < ByteWidth && "invalid index requested");
7432 (void) ByteWidth;
7433
7434 switch (Op.getOpcode()) {
7435 case ISD::OR: {
7436 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7437 if (!LHS)
7438 return None;
7439 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7440 if (!RHS)
7441 return None;
7442
7443 if (LHS->isConstantZero())
7444 return RHS;
7445 if (RHS->isConstantZero())
7446 return LHS;
7447 return None;
7448 }
7449 case ISD::SHL: {
7450 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7451 if (!ShiftOp)
7452 return None;
7453
7454 uint64_t BitShift = ShiftOp->getZExtValue();
7455 if (BitShift % 8 != 0)
7456 return None;
7458
7459 return Index < ByteShift
7460 ? ByteProvider::getConstantZero()
7461 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7462 Depth + 1);
7463 }
7464 case ISD::ANY_EXTEND:
7465 case ISD::SIGN_EXTEND:
7466 case ISD::ZERO_EXTEND: {
7467 SDValue NarrowOp = Op->getOperand(0);
7468 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7469 if (NarrowBitWidth % 8 != 0)
7470 return None;
7472
7473 if (Index >= NarrowByteWidth)
7474 return Op.getOpcode() == ISD::ZERO_EXTEND
7475 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7476 : None;
7477 return calculateByteProvider(NarrowOp, Index, Depth + 1);
7478 }
7479 case ISD::BSWAP:
7480 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7481 Depth + 1);
7482 case ISD::LOAD: {
7483 auto L = cast<LoadSDNode>(Op.getNode());
7484 if (!L->isSimple() || L->isIndexed())
7485 return None;
7486
7487 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7488 if (NarrowBitWidth % 8 != 0)
7489 return None;
7491
7492 if (Index >= NarrowByteWidth)
7493 return L->getExtensionType() == ISD::ZEXTLOAD
7494 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7495 : None;
7496 return ByteProvider::getMemory(L, Index);
7497 }
7498 }
7499
7500 return None;
7501}
7502
7503static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7504 return i;
7505}
7506
7507static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7508 return BW - i - 1;
7509}
7510
7511// Check if the bytes offsets we are looking at match with either big or
7512// little endian value loaded. Return true for big endian, false for little
7513// endian, and None if match failed.
7515 int64_t FirstOffset) {
7516 // The endian can be decided only when it is 2 bytes at least.
7517 unsigned Width = ByteOffsets.size();
7518 if (Width < 2)
7519 return None;
7520
7521 bool BigEndian = true, LittleEndian = true;
7522 for (unsigned i = 0; i < Width; i++) {
7525 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7526 if (!BigEndian && !LittleEndian)
7527 return None;
7528 }
7529
7530 assert((BigEndian != LittleEndian) && "It should be either big endian or"
7531 "little endian");
7532 return BigEndian;
7533}
7534
7536 switch (Value.getOpcode()) {
7537 case ISD::TRUNCATE:
7538 case ISD::ZERO_EXTEND:
7539 case ISD::SIGN_EXTEND:
7540 case ISD::ANY_EXTEND:
7541 return stripTruncAndExt(Value.getOperand(0));
7542 }
7543 return Value;
7544}
7545
7546/// Match a pattern where a wide type scalar value is stored by several narrow
7547/// stores. Fold it into a single store or a BSWAP and a store if the targets
7548/// supports it.
7549///
7550/// Assuming little endian target:
7551/// i8 *p = ...
7552/// i32 val = ...
7553/// p[0] = (val >> 0) & 0xFF;
7554/// p[1] = (val >> 8) & 0xFF;
7555/// p[2] = (val >> 16) & 0xFF;
7556/// p[3] = (val >> 24) & 0xFF;
7557/// =>
7558/// *((i32)p) = val;
7559///
7560/// i8 *p = ...
7561/// i32 val = ...
7562/// p[0] = (val >> 24) & 0xFF;
7563/// p[1] = (val >> 16) & 0xFF;
7564/// p[2] = (val >> 8) & 0xFF;
7565/// p[3] = (val >> 0) & 0xFF;
7566/// =>
7567/// *((i32)p) = BSWAP(val);
7568SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7569 // The matching looks for "store (trunc x)" patterns that appear early but are
7570 // likely to be replaced by truncating store nodes during combining.
7571 // TODO: If there is evidence that running this later would help, this
7572 // limitation could be removed. Legality checks may need to be added
7573 // for the created store and optional bswap/rotate.
7574 if (LegalOperations || OptLevel == CodeGenOpt::None)
7575 return SDValue();
7576
7577 // We only handle merging simple stores of 1-4 bytes.
7578 // TODO: Allow unordered atomics when wider type is legal (see D66309)
7579 EVT MemVT = N->getMemoryVT();
7580 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7581 !N->isSimple() || N->isIndexed())
7582 return SDValue();
7583
7584 // Collect all of the stores in the chain.
7585 SDValue Chain = N->getChain();
7587 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7588 // All stores must be the same size to ensure that we are writing all of the
7589 // bytes in the wide value.
7590 // TODO: We could allow multiple sizes by tracking each stored byte.
7591 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7592 Store->isIndexed())
7593 return SDValue();
7594 Stores.push_back(Store);
7595 Chain = Store->getChain();
7596 }
7597 // There is no reason to continue if we do not have at least a pair of stores.
7598 if (Stores.size() < 2)
7599 return SDValue();
7600
7601 // Handle simple types only.
7602 LLVMContext &Context = *DAG.getContext();
7603 unsigned NumStores = Stores.size();
7604 unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7605 unsigned WideNumBits = NumStores * NarrowNumBits;
7607 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7608 return SDValue();
7609
7610 // Check if all bytes of the source value that we are looking at are stored
7611 // to the same base address. Collect offsets from Base address into OffsetMap.
7613 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7614 int64_t FirstOffset = INT64_MAX;
7615 StoreSDNode *FirstStore = nullptr;
7617 for (auto Store : Stores) {
7618 // All the stores store different parts of the CombinedValue. A truncate is
7619 // required to get the partial value.
7620 SDValue Trunc = Store->getValue();
7621 if (Trunc.getOpcode() != ISD::TRUNCATE)
7622 return SDValue();
7623 // Other than the first/last part, a shift operation is required to get the
7624 // offset.
7625 int64_t Offset = 0;
7626 SDValue WideVal = Trunc.getOperand(0);
7627 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7628 isa<ConstantSDNode>(WideVal.getOperand(1))) {
7629 // The shift amount must be a constant multiple of the narrow type.
7630 // It is translated to the offset address in the wide source value "y".
7631 //
7632 // x = srl y, ShiftAmtC
7633 // i8 z = trunc x
7634 // store z, ...
7635 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7636 if (ShiftAmtC % NarrowNumBits != 0)
7637 return SDValue();
7638
7640 WideVal = WideVal.getOperand(0);
7641 }
7642
7643 // Stores must share the same source value with different offsets.
7644 // Truncate and extends should be stripped to get the single source value.
7645 if (!SourceValue)
7648 return SDValue();
7649 else if (SourceValue.getValueType() != WideVT) {
7650 if (WideVal.getValueType() == WideVT ||
7651 WideVal.getScalarValueSizeInBits() >
7652 SourceValue.getScalarValueSizeInBits())
7654 // Give up if the source value type is smaller than the store size.
7655 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7656 return SDValue();
7657 }
7658
7659 // Stores must share the same base address.
7660 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7661 int64_t ByteOffsetFromBase = 0;
7662 if (!Base)
7663 Base = Ptr;
7664 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7665 return SDValue();
7666
7667 // Remember the first store.
7669 FirstStore = Store;
7671 }
7672 // Map the offset in the store and the offset in the combined value, and
7673 // early return if it has been set before.
7674 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7675 return SDValue();
7676 OffsetMap[Offset] = ByteOffsetFromBase;
7677 }
7678
7679 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7680 assert(FirstStore && "First store must be set");
7681
7682 // Check that a store of the wide type is both allowed and fast on the target
7683 const DataLayout &Layout = DAG.getDataLayout();
7684 bool Fast = false;
7685 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7686 *FirstStore->getMemOperand(), &Fast);
7687 if (!Allowed || !Fast)
7688 return SDValue();
7689
7690 // Check if the pieces of the value are going to the expected places in memory
7691 // to merge the stores.
7692 auto checkOffsets = [&](bool MatchLittleEndian) {
7693 if (MatchLittleEndian) {
7694 for (unsigned i = 0; i != NumStores; ++i)
7695 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7696 return false;
7697 } else { // MatchBigEndian by reversing loop counter.
7698 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7699 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7700 return false;
7701 }
7702 return true;
7703 };
7704
7705 // Check if the offsets line up for the native data layout of this target.
7706 bool NeedBswap = false;
7707 bool NeedRotate = false;
7708 if (!checkOffsets(Layout.isLittleEndian())) {
7709 // Special-case: check if byte offsets line up for the opposite endian.
7710 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7711 NeedBswap = true;
7712 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7713 NeedRotate = true;
7714 else
7715 return SDValue();
7716 }
7717
7718 SDLoc DL(N);
7719 if (WideVT != SourceValue.getValueType()) {
7720 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7721 "Unexpected store value to merge");
7723 }
7724
7725 // Before legalize we can introduce illegal bswaps/rotates which will be later
7726 // converted to an explicit bswap sequence. This way we end up with a single
7727 // store and byte shuffling instead of several stores and byte shuffling.
7728 if (NeedBswap) {
7730 } else if (NeedRotate) {
7731 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7734 }
7735
7737 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7738 FirstStore->getPointerInfo(), FirstStore->getAlign());
7739
7740 // Rely on other DAG combine rules to remove the other individual stores.
7741 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7742 return NewStore;
7743}
7744
7745/// Match a pattern where a wide type scalar value is loaded by several narrow
7746/// loads and combined by shifts and ors. Fold it into a single load or a load
7747/// and a BSWAP if the targets supports it.
7748///
7749/// Assuming little endian target:
7750/// i8 *a = ...
7751/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7752/// =>
7753/// i32 val = *((i32)a)
7754///
7755/// i8 *a = ...
7756/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7757/// =>
7758/// i32 val = BSWAP(*((i32)a))
7759///
7760/// TODO: This rule matches complex patterns with OR node roots and doesn't
7761/// interact well with the worklist mechanism. When a part of the pattern is
7762/// updated (e.g. one of the loads) its direct users are put into the worklist,
7763/// but the root node of the pattern which triggers the load combine is not
7764/// necessarily a direct user of the changed node. For example, once the address
7765/// of t28 load is reassociated load combine won't be triggered:
7766/// t25: i32 = add t4, Constant:i32<2>
7767/// t26: i64 = sign_extend t25
7768/// t27: i64 = add t2, t26
7769/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7770/// t29: i32 = zero_extend t28
7771/// t32: i32 = shl t29, Constant:i8<8>
7772/// t33: i32 = or t23, t32
7773/// As a possible fix visitLoad can check if the load can be a part of a load
7774/// combine pattern and add corresponding OR roots to the worklist.
7775SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7776 assert(N->getOpcode() == ISD::OR &&
7777 "Can only match load combining against OR nodes");
7778
7779 // Handles simple types only
7780 EVT VT = N->getValueType(0);
7781 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7782 return SDValue();
7783 unsigned ByteWidth = VT.getSizeInBits() / 8;
7784
7786 auto MemoryByteOffset = [&] (ByteProvider P) {
7787 assert(P.isMemory() && "Must be a memory byte provider");
7788 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7789 assert(LoadBitWidth % 8 == 0 &&
7790 "can only analyze providers for individual bytes not bit");
7791 unsigned LoadByteWidth = LoadBitWidth / 8;
7792 return IsBigEndianTarget
7793 ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7794 : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7795 };
7796
7798 SDValue Chain;
7799
7802 int64_t FirstOffset = INT64_MAX;
7803
7804 // Check if all the bytes of the OR we are looking at are loaded from the same
7805 // base address. Collect bytes offsets from Base address in ByteOffsets.
7807 unsigned ZeroExtendedBytes = 0;
7808 for (int i = ByteWidth - 1; i >= 0; --i) {
7809 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7810 if (!P)
7811 return SDValue();
7812
7813 if (P->isConstantZero()) {
7814 // It's OK for the N most significant bytes to be 0, we can just
7815 // zero-extend the load.
7816 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7817 return SDValue();
7818 continue;
7819 }
7820 assert(P->isMemory() && "provenance should either be memory or zero");
7821
7822 LoadSDNode *L = P->Load;
7823 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7824 !L->isIndexed() &&
7825 "Must be enforced by calculateByteProvider");
7826 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7827
7828 // All loads must share the same chain
7829 SDValue LChain = L->getChain();
7830 if (!Chain)
7831 Chain = LChain;
7832 else if (Chain != LChain)
7833 return SDValue();
7834
7835 // Loads must share the same base address
7837 int64_t ByteOffsetFromBase = 0;
7838 if (!Base)
7839 Base = Ptr;
7840 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7841 return SDValue();
7842
7843 // Calculate the offset of the current byte from the base address
7846
7847 // Remember the first byte load
7851 }
7852
7853 Loads.insert(L);
7854 }
7855 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7856 "memory, so there must be at least one load which produces the value");
7857 assert(Base && "Base address of the accessed memory location must be set");
7858 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7859
7860 bool NeedsZext = ZeroExtendedBytes > 0;
7861
7862 EVT MemVT =
7864
7865 if (!MemVT.isSimple())
7866 return SDValue();
7867
7868 // Before legalize we can introduce too wide illegal loads which will be later
7869 // split into legal sized loads. This enables us to combine i64 load by i8
7870 // patterns to a couple of i32 loads on 32 bit targets.
7871 if (LegalOperations &&
7873 MemVT))
7874 return SDValue();
7875
7876 // Check if the bytes of the OR we are looking at match with either big or
7877 // little endian value load
7878 Optional<bool> IsBigEndian = isBigEndian(
7880 if (!IsBigEndian.hasValue())
7881 return SDValue();
7882
7883 assert(FirstByteProvider && "must be set");
7884
7885 // Ensure that the first byte is loaded from zero offset of the first load.
7886 // So the combined value can be loaded from the first load address.
7888 return SDValue();
7890
7891 // The node we are looking at matches with the pattern, check if we can
7892 // replace it with a single (possibly zero-extended) load and bswap + shift if
7893 // needed.
7894
7895 // If the load needs byte swap check if the target supports it
7896 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7897
7898 // Before legalize we can introduce illegal bswaps which will be later
7899 // converted to an explicit bswap sequence. This way we end up with a single
7900 // load and byte shuffling instead of several loads and byte shuffling.
7901 // We do not introduce illegal bswaps when zero-extending as this tends to
7902 // introduce too many arithmetic instructions.
7903 if (NeedsBswap && (LegalOperations || NeedsZext) &&
7904 !TLI.isOperationLegal(ISD::BSWAP, VT))
7905 return SDValue();
7906
7907 // If we need to bswap and zero extend, we have to insert a shift. Check that
7908 // it is legal.
7909 if (NeedsBswap && NeedsZext && LegalOperations &&
7910 !TLI.isOperationLegal(ISD::SHL, VT))
7911 return SDValue();
7912
7913 // Check that a load of the wide type is both allowed and fast on the target
7914 bool Fast = false;
7915 bool Allowed =
7917 *FirstLoad->getMemOperand(), &Fast);
7918 if (!Allowed || !Fast)
7919 return SDValue();
7920
7923 Chain, FirstLoad->getBasePtr(),
7925
7926 // Transfer chain users from old loads to the new load.
7927 for (LoadSDNode *L : Loads)
7928 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7929
7930 if (!NeedsBswap)
7931 return NewLoad;
7932
7934 NeedsZext
7935 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7937 SDLoc(N), LegalOperations))
7938 : NewLoad;
7939 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7940}
7941
7942// If the target has andn, bsl, or a similar bit-select instruction,
7943// we want to unfold masked merge, with canonical pattern of:
7944// | A | |B|
7945// ((x ^ y) & m) ^ y
7946// | D |
7947// Into:
7948// (x & m) | (y & ~m)
7949// If y is a constant, m is not a 'not', and the 'andn' does not work with
7950// immediates, we unfold into a different pattern:
7951// ~(~x & m) & (m | y)
7952// If x is a constant, m is a 'not', and the 'andn' does not work with
7953// immediates, we unfold into a different pattern:
7954// (x | ~m) & ~(~m & ~y)
7955// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7956// the very least that breaks andnpd / andnps patterns, and because those
7957// patterns are simplified in IR and shouldn't be created in the DAG
7958SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7959 assert(N->getOpcode() == ISD::XOR);
7960
7961 // Don't touch 'not' (i.e. where y = -1).
7962 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7963 return SDValue();
7964
7965 EVT VT = N->getValueType(0);
7966
7967 // There are 3 commutable operators in the pattern,
7968 // so we have to deal with 8 possible variants of the basic pattern.
7969 SDValue X, Y, M;
7970 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7971 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7972 return false;
7974 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7975 return false;
7976 SDValue Xor0 = Xor.getOperand(0);
7977 SDValue Xor1 = Xor.getOperand(1);
7978 // Don't touch 'not' (i.e. where y = -1).
7980 return false;
7981 if (Other == Xor0)
7983 if (Other != Xor1)
7984 return false;
7985 X = Xor0;
7986 Y = Xor1;
7987 M = And.getOperand(XorIdx ? 0 : 1);
7988 return true;
7989 };
7990
7991 SDValue N0 = N->getOperand(0);
7992 SDValue N1 = N->getOperand(1);
7993 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7994 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7995 return SDValue();
7996
7997 // Don't do anything if the mask is constant. This should not be reachable.
7998 // InstCombine should have already unfolded this pattern, and DAGCombiner
7999 // probably shouldn't produce it, too.
8000 if (isa<ConstantSDNode>(M.getNode()))
8001 return SDValue();
8002
8003 // We can transform if the target has AndNot
8004 if (!TLI.hasAndNot(M))
8005 return SDValue();
8006
8007 SDLoc DL(N);
8008
8009 // If Y is a constant, check that 'andn' works with immediates. Unless M is
8010 // a bitwise not that would already allow ANDN to be used.
8011 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
8012 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
8013 // If not, we need to do a bit more work to make sure andn is still used.
8014 SDValue NotX = DAG.getNOT(DL, X, VT);
8015 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
8016 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
8017 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
8018 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
8019 }
8020
8021 // If X is a constant and M is a bitwise not, check that 'andn' works with
8022 // immediates.
8023 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
8024 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
8025 // If not, we need to do a bit more work to make sure andn is still used.
8026 SDValue NotM = M.getOperand(0);
8027 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
8028 SDValue NotY = DAG.getNOT(DL, Y, VT);
8029 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
8030 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
8031 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
8032 }
8033
8034 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
8035 SDValue NotM = DAG.getNOT(DL, M, VT);
8036 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
8037
8038 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
8039}
8040
8041SDValue DAGCombiner::visitXOR(SDNode *N) {
8042 SDValue N0 = N->getOperand(0);
8043 SDValue N1 = N->getOperand(1);
8044 EVT VT = N0.getValueType();
8045 SDLoc DL(N);
8046
8047 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
8048 if (N0.isUndef() && N1.isUndef())
8049 return DAG.getConstant(0, DL, VT);
8050
8051 // fold (xor x, undef) -> undef
8052 if (N0.isUndef())
8053 return N0;
8054 if (N1.isUndef())
8055 return N1;
8056
8057 // fold (xor c1, c2) -> c1^c2
8058 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
8059 return C;
8060
8061 // canonicalize constant to RHS
8064 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
8065
8066 // fold vector ops
8067 if (VT.isVector()) {
8069 return FoldedVOp;
8070
8071 // fold (xor x, 0) -> x, vector edition
8073 return N0;
8074 }
8075
8076 // fold (xor x, 0) -> x
8077 if (isNullConstant(N1))
8078 return N0;
8079
8081 return NewSel;
8082
8083 // reassociate xor
8084 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8085 return RXOR;
8086
8087 // fold !(x cc y) -> (x !cc y)
8088 unsigned N0Opcode = N0.getOpcode();
8089 SDValue LHS, RHS, CC;
8090 if (TLI.isConstTrueVal(N1) &&
8091 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
8093 LHS.getValueType());
8094 if (!LegalOperations ||
8095 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8096 switch (N0Opcode) {
8097 default:
8098 llvm_unreachable("Unhandled SetCC Equivalent!");
8099 case ISD::SETCC:
8100 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8101 case ISD::SELECT_CC:
8102 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8103 N0.getOperand(3), NotCC);
8104 case ISD::STRICT_FSETCC:
8105 case ISD::STRICT_FSETCCS: {
8106 if (N0.hasOneUse()) {
8107 // FIXME Can we handle multiple uses? Could we token factor the chain
8108 // results from the new/old setcc?
8109 SDValue SetCC =
8110 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8112 CombineTo(N, SetCC);
8113 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8114 recursivelyDeleteUnusedNodes(N0.getNode());
8115 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8116 }
8117 break;
8118 }
8119 }
8120 }
8121 }
8122
8123 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8124 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8125 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8126 SDValue V = N0.getOperand(0);
8127 SDLoc DL0(N0);
8128 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8129 DAG.getConstant(1, DL0, V.getValueType()));
8130 AddToWorklist(V.getNode());
8131 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8132 }
8133
8134 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8135 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8136 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8137 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8139 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8140 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8141 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8142 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8143 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8144 }
8145 }
8146 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8147 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8148 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8149 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8151 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8152 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8153 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8154 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8155 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8156 }
8157 }
8158
8159 // fold (not (neg x)) -> (add X, -1)
8160 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8161 // Y is a constant or the subtract has a single use.
8162 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8163 isNullConstant(N0.getOperand(0))) {
8164 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8165 DAG.getAllOnesConstant(DL, VT));
8166 }
8167
8168 // fold (not (add X, -1)) -> (neg X)
8169 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8171 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8172 N0.getOperand(0));
8173 }
8174
8175 // fold (xor (and x, y), y) -> (and (not x), y)
8176 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8177 SDValue X = N0.getOperand(0);
8178 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8179 AddToWorklist(NotX.getNode());
8180 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8181 }
8182
8183 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
8186 unsigned BitWidth = VT.getScalarSizeInBits();
8187 if (XorC && ShiftC) {
8188 // Don't crash on an oversized shift. We can not guarantee that a bogus
8189 // shift has been simplified to undef.
8190 uint64_t ShiftAmt = ShiftC->getLimitedValue();
8191 if (ShiftAmt < BitWidth) {
8193 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
8194 if (XorC->getAPIntValue() == Ones) {
8195 // If the xor constant is a shifted -1, do a 'not' before the shift:
8196 // xor (X << ShiftC), XorC --> (not X) << ShiftC
8197 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
8198 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
8199 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
8200 }
8201 }
8202 }
8203 }
8204
8205 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8206 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8207 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8208 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8209 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8210 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8211 SDValue S0 = S.getOperand(0);
8212 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8214 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8215 return DAG.getNode(ISD::ABS, DL, VT, S0);
8216 }
8217 }
8218
8219 // fold (xor x, x) -> 0
8220 if (N0 == N1)
8221 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8222
8223 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8224 // Here is a concrete example of this equivalence:
8225 // i16 x == 14
8226 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
8227 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8228 //
8229 // =>
8230 //
8231 // i16 ~1 == 0b1111111111111110
8232 // i16 rol(~1, 14) == 0b1011111111111111
8233 //
8234 // Some additional tips to help conceptualize this transform:
8235 // - Try to see the operation as placing a single zero in a value of all ones.
8236 // - There exists no value for x which would allow the result to contain zero.
8237 // - Values of x larger than the bitwidth are undefined and do not require a
8238 // consistent result.
8239 // - Pushing the zero left requires shifting one bits in from the right.
8240 // A rotate left of ~1 is a nice way of achieving the desired result.
8243 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8244 N0.getOperand(1));
8245 }
8246
8247 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
8248 if (N0Opcode == N1.getOpcode())
8250 return V;
8251
8252 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
8254 return MM;
8255
8256 // Simplify the expression using non-local knowledge.
8258 return SDValue(N, 0);
8259
8260 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8261 return Combined;
8262
8263 return SDValue();
8264}
8265
8266/// If we have a shift-by-constant of a bitwise logic op that itself has a
8267/// shift-by-constant operand with identical opcode, we may be able to convert
8268/// that into 2 independent shifts followed by the logic op. This is a
8269/// throughput improvement.
8271 // Match a one-use bitwise logic op.
8272 SDValue LogicOp = Shift->getOperand(0);
8273 if (!LogicOp.hasOneUse())
8274 return SDValue();
8275
8276 unsigned LogicOpcode = LogicOp.getOpcode();
8277 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8279 return SDValue();
8280
8281 // Find a matching one-use shift by constant.
8282 unsigned ShiftOpcode = Shift->getOpcode();
8283 SDValue C1 = Shift->getOperand(1);
8285 assert(C1Node && "Expected a shift with constant operand");
8286 const APInt &C1Val = C1Node->getAPIntValue();
8287 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8288 const APInt *&ShiftAmtVal) {
8289 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8290 return false;
8291
8292 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8293 if (!ShiftCNode)
8294 return false;
8295
8296 // Capture the shifted operand and shift amount value.
8297 ShiftOp = V.getOperand(0);
8298 ShiftAmtVal = &ShiftCNode->getAPIntValue();
8299
8300 // Shift amount types do not have to match their operand type, so check that
8301 // the constants are the same width.
8302 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8303 return false;
8304
8305 // The fold is not valid if the sum of the shift values exceeds bitwidth.
8306 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8307 return false;
8308
8309 return true;
8310 };
8311
8312 // Logic ops are commutative, so check each operand for a match.
8313 SDValue X, Y;
8314 const APInt *C0Val;
8315 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8316 Y = LogicOp.getOperand(1);
8317 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8318 Y = LogicOp.getOperand(0);
8319 else
8320 return SDValue();
8321
8322 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8323 SDLoc DL(Shift);
8324 EVT VT = Shift->getValueType(0);
8325 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8328 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8329 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8330}
8331
8332/// Handle transforms common to the three shifts, when the shift amount is a
8333/// constant.
8334/// We are looking for: (shift being one of shl/sra/srl)
8335/// shift (binop X, C0), C1
8336/// And want to transform into:
8337/// binop (shift X, C1), (shift C0, C1)
8338SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8339 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8340
8341 // Do not turn a 'not' into a regular xor.
8342 if (isBitwiseNot(N->getOperand(0)))
8343 return SDValue();
8344
8345 // The inner binop must be one-use, since we want to replace it.
8346 SDValue LHS = N->getOperand(0);
8347 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8348 return SDValue();
8349
8350 // TODO: This is limited to early combining because it may reveal regressions
8351 // otherwise. But since we just checked a target hook to see if this is
8352 // desirable, that should have filtered out cases where this interferes
8353 // with some other pattern matching.
8354 if (!LegalTypes)
8355 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8356 return R;
8357
8358 // We want to pull some binops through shifts, so that we have (and (shift))
8359 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
8360 // thing happens with address calculations, so it's important to canonicalize
8361 // it.
8362 switch (LHS.getOpcode()) {
8363 default:
8364 return SDValue();
8365 case ISD::OR:
8366 case ISD::XOR:
8367 case ISD::AND:
8368 break;
8369 case ISD::ADD:
8370 if (N->getOpcode() != ISD::SHL)
8371 return SDValue(); // only shl(add) not sr[al](add).
8372 break;
8373 }
8374
8375 // We require the RHS of the binop to be a constant and not opaque as well.
8376 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8377 if (!BinOpCst)
8378 return SDValue();
8379
8380 // FIXME: disable this unless the input to the binop is a shift by a constant
8381 // or is copy/select. Enable this in other cases when figure out it's exactly
8382 // profitable.
8383 SDValue BinOpLHSVal = LHS.getOperand(0);
8384 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8385 BinOpLHSVal.getOpcode() == ISD::SRA ||
8386 BinOpLHSVal.getOpcode() == ISD::SRL) &&
8387 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8388 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8389 BinOpLHSVal.getOpcode() == ISD::SELECT;
8390
8392 return SDValue();
8393
8394 if (IsCopyOrSelect && N->hasOneUse())
8395 return SDValue();
8396
8397 // Fold the constants, shifting the binop RHS by the shift amount.
8398 SDLoc DL(N);
8399 EVT VT = N->getValueType(0);
8400 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8401 N->getOperand(1));
8402 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8403
8404 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8405 N->getOperand(1));
8406 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8407}
8408
8409SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8410 assert(N->getOpcode() == ISD::TRUNCATE);
8411 assert(N->getOperand(0).getOpcode() == ISD::AND);
8412
8413 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8414 EVT TruncVT = N->getValueType(0);
8415 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8417 SDValue N01 = N->getOperand(0).getOperand(1);
8418 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8419 SDLoc DL(N);
8420 SDValue N00 = N->getOperand(0).getOperand(0);
8423 AddToWorklist(Trunc00.getNode());
8424 AddToWorklist(Trunc01.getNode());
8425 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8426 }
8427 }
8428
8429 return SDValue();
8430}
8431
8432SDValue DAGCombiner::visitRotate(SDNode *N) {
8433 SDLoc dl(N);
8434 SDValue N0 = N->getOperand(0);
8435 SDValue N1 = N->getOperand(1);
8436 EVT VT = N->getValueType(0);
8437 unsigned Bitsize = VT.getScalarSizeInBits();
8438
8439 // fold (rot x, 0) -> x
8440 if (isNullOrNullSplat(N1))
8441 return N0;
8442
8443 // fold (rot x, c) -> x iff (c % BitSize) == 0
8444 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8445 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8447 return N0;
8448 }
8449
8450 // fold (rot x, c) -> (rot x, c % BitSize)
8451 bool OutOfRange = false;
8453 OutOfRange |= C->getAPIntValue().uge(Bitsize);
8454 return true;
8455 };
8457 EVT AmtVT = N1.getValueType();
8458 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8459 if (SDValue Amt =
8460 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8461 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8462 }
8463
8464 // rot i16 X, 8 --> bswap X
8466 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8467 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8468 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8469
8470 // Simplify the operands using demanded-bits information.
8472 return SDValue(N, 0);
8473
8474 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8475 if (N1.getOpcode() == ISD::TRUNCATE &&
8476 N1.getOperand(0).getOpcode() == ISD::AND) {
8478 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8479 }
8480
8481 unsigned NextOp = N0.getOpcode();
8482 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8483 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8486 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8487 EVT ShiftVT = C1->getValueType(0);
8488 bool SameSide = (N->getOpcode() == NextOp);
8489 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8491 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8495 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8497 }
8498 }
8499 }
8500 return SDValue();
8501}
8502
8503SDValue DAGCombiner::visitSHL(SDNode *N) {
8504 SDValue N0 = N->getOperand(0);
8505 SDValue N1 = N->getOperand(1);
8506 if (SDValue V = DAG.simplifyShift(N0, N1))
8507 return V;
8508
8509 EVT VT = N0.getValueType();
8510 EVT ShiftVT = N1.getValueType();
8511 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8512
8513 // fold (shl c1, c2) -> c1<<c2
8514 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8515 return C;
8516
8517 // fold vector ops
8518 if (VT.isVector()) {
8520 return FoldedVOp;
8521
8523 // If setcc produces all-one true value then:
8524 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8525 if (N1CV && N1CV->isConstant()) {
8526 if (N0.getOpcode() == ISD::AND) {
8527 SDValue N00 = N0->getOperand(0);
8528 SDValue N01 = N0->getOperand(1);
8530
8531 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8532 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8534 if (SDValue C =
8536 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8537 }
8538 }
8539 }
8540 }
8541
8543 return NewSel;
8544
8545 // if (shl x, c) is known to be zero, return 0
8547 return DAG.getConstant(0, SDLoc(N), VT);
8548
8549 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8550 if (N1.getOpcode() == ISD::TRUNCATE &&
8551 N1.getOperand(0).getOpcode() == ISD::AND) {
8553 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8554 }
8555
8557 return SDValue(N, 0);
8558
8559 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8560 if (N0.getOpcode() == ISD::SHL) {
8563 APInt c1 = LHS->getAPIntValue();
8564 APInt c2 = RHS->getAPIntValue();
8565 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8566 return (c1 + c2).uge(OpSizeInBits);
8567 };
8569 return DAG.getConstant(0, SDLoc(N), VT);
8570
8573 APInt c1 = LHS->getAPIntValue();
8574 APInt c2 = RHS->getAPIntValue();
8575 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8576 return (c1 + c2).ult(OpSizeInBits);
8577 };
8579 SDLoc DL(N);
8580 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8581 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8582 }
8583 }
8584
8585 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8586 // For this to be valid, the second form must not preserve any of the bits
8587 // that are shifted out by the inner shift in the first form. This means
8588 // the outer shift size must be >= the number of bits added by the ext.
8589 // As a corollary, we don't care what kind of ext it is.
8590 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8591 N0.getOpcode() == ISD::ANY_EXTEND ||
8592 N0.getOpcode() == ISD::SIGN_EXTEND) &&
8593 N0.getOperand(0).getOpcode() == ISD::SHL) {
8594 SDValue N0Op0 = N0.getOperand(0);
8595 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8596 EVT InnerVT = N0Op0.getValueType();
8597 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8598
8601 APInt c1 = LHS->getAPIntValue();
8602 APInt c2 = RHS->getAPIntValue();
8603 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8604 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8605 (c1 + c2).uge(OpSizeInBits);
8606 };
8608 /*AllowUndefs*/ false,
8609 /*AllowTypeMismatch*/ true))
8610 return DAG.getConstant(0, SDLoc(N), VT);
8611
8614 APInt c1 = LHS->getAPIntValue();
8615 APInt c2 = RHS->getAPIntValue();
8616 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8617 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8618 (c1 + c2).ult(OpSizeInBits);
8619 };
8621 /*AllowUndefs*/ false,
8622 /*AllowTypeMismatch*/ true)) {
8623 SDLoc DL(N);
8624 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8626 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8627 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8628 }
8629 }
8630
8631 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8632 // Only fold this if the inner zext has no other uses to avoid increasing
8633 // the total number of instructions.
8634 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8635 N0.getOperand(0).getOpcode() == ISD::SRL) {
8636 SDValue N0Op0 = N0.getOperand(0);
8637 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8638
8640 APInt c1 = LHS->getAPIntValue();
8641 APInt c2 = RHS->getAPIntValue();
8642 zeroExtendToMatch(c1, c2);
8643 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8644 };
8646 /*AllowUndefs*/ false,
8647 /*AllowTypeMismatch*/ true)) {
8648 SDLoc DL(N);
8649 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8651 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8652 AddToWorklist(NewSHL.getNode());
8653 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8654 }
8655 }
8656
8657 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8658 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
8659 // TODO - support non-uniform vector shift amounts.
8661 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8662 N0->getFlags().hasExact()) {
8664 uint64_t C1 = N0C1->getZExtValue();
8665 uint64_t C2 = N1C->getZExtValue();
8666 SDLoc DL(N);
8667 if (C1 <= C2)
8668 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8669 DAG.getConstant(C2 - C1, DL, ShiftVT));
8670 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8671 DAG.getConstant(C1 - C2, DL, ShiftVT));
8672 }
8673 }
8674
8675 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8676 // (and (srl x, (sub c1, c2), MASK)
8677 // Only fold this if the inner shift has no other uses -- if it does, folding
8678 // this will increase the total number of instructions.
8679 // TODO - drop hasOneUse requirement if c1 == c2?
8680 // TODO - support non-uniform vector shift amounts.
8681 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8684 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8685 uint64_t c1 = N0C1->getZExtValue();
8686 uint64_t c2 = N1C->getZExtValue();
8688 SDValue Shift;
8689 if (c2 > c1) {
8690 Mask <<= c2 - c1;
8691 SDLoc DL(N);
8692 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8693 DAG.getConstant(c2 - c1, DL, ShiftVT));
8694 } else {
8695 Mask.lshrInPlace(c1 - c2);
8696 SDLoc DL(N);
8697 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8698 DAG.getConstant(c1 - c2, DL, ShiftVT));
8699 }
8700 SDLoc DL(N0);
8701 return DAG.getNode(ISD::AND, DL, VT, Shift,
8702 DAG.getConstant(Mask, DL, VT));
8703 }
8704 }
8705 }
8706
8707 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8708 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8709 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8710 SDLoc DL(N);
8713 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8714 }
8715
8716 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8717 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8718 // Variant of version done on multiply, except mul by a power of 2 is turned
8719 // into a shift.
8720 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8721 N0.getNode()->hasOneUse() &&
8722 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8723 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8724 TLI.isDesirableToCommuteWithShift(N, Level)) {
8725 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8726 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8727 AddToWorklist(Shl0.getNode());
8728 AddToWorklist(Shl1.getNode());
8729 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8730 }
8731
8732 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8733 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8734 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8735 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8736 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8738 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8739 }
8740
8741 if (N1C && !N1C->isOpaque())
8743 return NewSHL;
8744
8745 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8746 if (N0.getOpcode() == ISD::VSCALE)
8747 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8748 const APInt &C0 = N0.getConstantOperandAPInt(0);
8749 const APInt &C1 = NC1->getAPIntValue();
8750 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8751 }
8752
8753 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8754 APInt ShlVal;
8755 if (N0.getOpcode() == ISD::STEP_VECTOR)
8756 if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8757 const APInt &C0 = N0.getConstantOperandAPInt(0);
8758 if (ShlVal.ult(C0.getBitWidth())) {
8759 APInt NewStep = C0 << ShlVal;
8760 return DAG.getStepVector(SDLoc(N), VT, NewStep);
8761 }
8762 }
8763
8764 return SDValue();
8765}
8766
8767// Transform a right shift of a multiply into a multiply-high.
8768// Examples:
8769// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8770// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8772 const TargetLowering &TLI) {
8773 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8774 "SRL or SRA node is required here!");
8775
8776 // Check the shift amount. Proceed with the transformation if the shift
8777 // amount is constant.
8779 if (!ShiftAmtSrc)
8780 return SDValue();
8781
8782 SDLoc DL(N);
8783
8784 // The operation feeding into the shift must be a multiply.
8785 SDValue ShiftOperand = N->getOperand(0);
8786 if (ShiftOperand.getOpcode() != ISD::MUL)
8787 return SDValue();
8788
8789 // Both operands must be equivalent extend nodes.
8790 SDValue LeftOp = ShiftOperand.getOperand(0);
8791 SDValue RightOp = ShiftOperand.getOperand(1);
8792
8793 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8794 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8795
8796 if (!IsSignExt && !IsZeroExt)
8797 return SDValue();
8798
8799 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8800 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8801
8804 unsigned ActiveBits = IsSignExt
8805 ? Constant->getAPIntValue().getMinSignedBits()
8806 : Constant->getAPIntValue().getActiveBits();
8808 return SDValue();
8810 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
8811 NarrowVT);
8812 } else {
8813 if (LeftOp.getOpcode() != RightOp.getOpcode())
8814 return SDValue();
8815 // Check that the two extend nodes are the same type.
8816 if (NarrowVT != RightOp.getOperand(0).getValueType())
8817 return SDValue();
8818 MulhRightOp = RightOp.getOperand(0);
8819 }
8820
8821 EVT WideVT = LeftOp.getValueType();
8822 // Proceed with the transformation if the wide types match.
8823 assert((WideVT == RightOp.getValueType()) &&
8824 "Cannot have a multiply node with two different operand types.");
8825
8826 // Proceed with the transformation if the wide type is twice as large
8827 // as the narrow type.
8828 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
8829 return SDValue();
8830
8831 // Check the shift amount with the narrow type size.
8832 // Proceed with the transformation if the shift amount is the width
8833 // of the narrow type.
8834 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8835 if (ShiftAmt != NarrowVTSize)
8836 return SDValue();
8837
8838 // If the operation feeding into the MUL is a sign extend (sext),
8839 // we use mulhs. Othewise, zero extends (zext) use mulhu.
8841
8842 // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8844 return SDValue();
8845
8846 SDValue Result =
8847 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
8848 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
8849 : DAG.getZExtOrTrunc(Result, DL, WideVT));
8850}
8851
8852SDValue DAGCombiner::visitSRA(SDNode *N) {
8853 SDValue N0 = N->getOperand(0);
8854 SDValue N1 = N->getOperand(1);
8855 if (SDValue V = DAG.simplifyShift(N0, N1))
8856 return V;
8857
8858 EVT VT = N0.getValueType();
8859 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8860
8861 // fold (sra c1, c2) -> (sra c1, c2)
8862 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8863 return C;
8864
8865 // Arithmetic shifting an all-sign-bit value is a no-op.
8866 // fold (sra 0, x) -> 0
8867 // fold (sra -1, x) -> -1
8868 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8869 return N0;
8870
8871 // fold vector ops
8872 if (VT.isVector())
8874 return FoldedVOp;
8875
8877 return NewSel;
8878
8879 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8880 // sext_inreg.
8882 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8883 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8885 if (VT.isVector())
8888 if (!LegalOperations ||
8891 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8892 N0.getOperand(0), DAG.getValueType(ExtVT));
8893 // Even if we can't convert to sext_inreg, we might be able to remove
8894 // this shift pair if the input is already sign extended.
8895 if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8896 return N0.getOperand(0);
8897 }
8898
8899 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8900 // clamp (add c1, c2) to max shift.
8901 if (N0.getOpcode() == ISD::SRA) {
8902 SDLoc DL(N);
8903 EVT ShiftVT = N1.getValueType();
8904 EVT ShiftSVT = ShiftVT.getScalarType();
8906
8908 APInt c1 = LHS->getAPIntValue();
8909 APInt c2 = RHS->getAPIntValue();
8910 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8911 APInt Sum = c1 + c2;
8912 unsigned ShiftSum =
8913 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8914 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8915 return true;
8916 };
8918 SDValue ShiftValue;
8919 if (N1.getOpcode() == ISD::BUILD_VECTOR)
8920 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8921 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8922 assert(ShiftValues.size() == 1 &&
8923 "Expected matchBinaryPredicate to return one element for "
8924 "SPLAT_VECTORs");
8925 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8926 } else
8927 ShiftValue = ShiftValues[0];
8928 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8929 }
8930 }
8931
8932 // fold (sra (shl X, m), (sub result_size, n))
8933 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8934 // result_size - n != m.
8935 // If truncate is free for the target sext(shl) is likely to result in better
8936 // code.
8937 if (N0.getOpcode() == ISD::SHL && N1C) {
8938 // Get the two constanst of the shifts, CN0 = m, CN = n.
8940 if (N01C) {
8941 LLVMContext &Ctx = *DAG.getContext();
8942 // Determine what the truncate's result bitsize and type would be.
8943 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8944
8945 if (VT.isVector())
8947
8948 // Determine the residual right-shift amount.
8949 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8950
8951 // If the shift is not a no-op (in which case this should be just a sign
8952 // extend already), the truncated to type is legal, sign_extend is legal
8953 // on that type, and the truncate to that type is both legal and free,
8954 // perform the transform.
8955 if ((ShiftAmt > 0) &&
8958 TLI.isTruncateFree(VT, TruncVT)) {
8959 SDLoc DL(N);
8960 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8962 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8963 N0.getOperand(0), Amt);
8964 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8965 Shift);
8966 return DAG.getNode(ISD::SIGN_EXTEND, DL,
8967 N->getValueType(0), Trunc);
8968 }
8969 }
8970 }
8971
8972 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8973 // sra (add (shl X, N1C), AddC), N1C -->
8974 // sext (add (trunc X to (width - N1C)), AddC')
8975 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8976 N0.getOperand(0).getOpcode() == ISD::SHL &&
8977 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8979 SDValue Shl = N0.getOperand(0);
8980 // Determine what the truncate's type would be and ask the target if that
8981 // is a free operation.
8982 LLVMContext &Ctx = *DAG.getContext();
8983 unsigned ShiftAmt = N1C->getZExtValue();
8984 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8985 if (VT.isVector())
8987
8988 // TODO: The simple type check probably belongs in the default hook
8989 // implementation and/or target-specific overrides (because
8990 // non-simple types likely require masking when legalized), but that
8991 // restriction may conflict with other transforms.
8992 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8993 TLI.isTruncateFree(VT, TruncVT)) {
8994 SDLoc DL(N);
8995 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8996 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8997 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8998 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8999 return DAG.getSExtOrTrunc(Add, DL, VT);
9000 }
9001 }
9002 }
9003
9004 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
9005 if (N1.getOpcode() == ISD::TRUNCATE &&
9006 N1.getOperand(0).getOpcode() == ISD::AND) {
9008 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
9009 }
9010
9011 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
9012 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
9013 // if c1 is equal to the number of bits the trunc removes
9014 // TODO - support non-uniform vector shift amounts.
9015 if (N0.getOpcode() == ISD::TRUNCATE &&
9016 (N0.getOperand(0).getOpcode() == ISD::SRL ||
9017 N0.getOperand(0).getOpcode() == ISD::SRA) &&
9018 N0.getOperand(0).hasOneUse() &&
9019 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
9020 SDValue N0Op0 = N0.getOperand(0);
9021 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
9022 EVT LargeVT = N0Op0.getValueType();
9023 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
9024 if (LargeShift->getAPIntValue() == TruncBits) {
9025 SDLoc DL(N);
9026 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
9028 SDValue SRA =
9029 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
9030 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
9031 }
9032 }
9033 }
9034
9035 // Simplify, based on bits shifted out of the LHS.
9037 return SDValue(N, 0);
9038
9039 // If the sign bit is known to be zero, switch this to a SRL.
9040 if (DAG.SignBitIsZero(N0))
9041 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
9042
9043 if (N1C && !N1C->isOpaque())
9045 return NewSRA;
9046
9047 // Try to transform this shift into a multiply-high if
9048 // it matches the appropriate pattern detected in combineShiftToMULH.
9049 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9050 return MULH;
9051
9052 // Attempt to convert a sra of a load into a narrower sign-extending load.
9054 return NarrowLoad;
9055
9056 return SDValue();
9057}
9058
9059SDValue DAGCombiner::visitSRL(SDNode *N) {
9060 SDValue N0 = N->getOperand(0);
9061 SDValue N1 = N->getOperand(1);
9062 if (SDValue V = DAG.simplifyShift(N0, N1))
9063 return V;
9064
9065 EVT VT = N0.getValueType();
9066 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9067
9068 // fold (srl c1, c2) -> c1 >>u c2
9069 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
9070 return C;
9071
9072 // fold vector ops
9073 if (VT.isVector())
9075 return FoldedVOp;
9076
9078 return NewSel;
9079
9080 // if (srl x, c) is known to be zero, return 0
9082 if (N1C &&
9084 return DAG.getConstant(0, SDLoc(N), VT);
9085
9086 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
9087 if (N0.getOpcode() == ISD::SRL) {
9090 APInt c1 = LHS->getAPIntValue();
9091 APInt c2 = RHS->getAPIntValue();
9092 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9093 return (c1 + c2).uge(OpSizeInBits);
9094 };
9096 return DAG.getConstant(0, SDLoc(N), VT);
9097
9100 APInt c1 = LHS->getAPIntValue();
9101 APInt c2 = RHS->getAPIntValue();
9102 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9103 return (c1 + c2).ult(OpSizeInBits);
9104 };
9106 SDLoc DL(N);
9107 EVT ShiftVT = N1.getValueType();
9108 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9109 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9110 }
9111 }
9112
9113 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9114 N0.getOperand(0).getOpcode() == ISD::SRL) {
9116 // TODO - support non-uniform vector shift amounts.
9117 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9118 uint64_t c1 = N001C->getZExtValue();
9119 uint64_t c2 = N1C->getZExtValue();
9120 EVT InnerShiftVT = InnerShift.getValueType();
9121 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9122 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9123 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9124 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9125 if (c1 + OpSizeInBits == InnerShiftSize) {
9126 SDLoc DL(N);
9127 if (c1 + c2 >= InnerShiftSize)
9128 return DAG.getConstant(0, DL, VT);
9129 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9131 InnerShift.getOperand(0), NewShiftAmt);
9132 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9133 }
9134 // In the more general case, we can clear the high bits after the shift:
9135 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9136 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9137 c1 + c2 < InnerShiftSize) {
9138 SDLoc DL(N);
9139 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9141 InnerShift.getOperand(0), NewShiftAmt);
9143 OpSizeInBits - c2),
9144 DL, InnerShiftVT);
9146 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9147 }
9148 }
9149 }
9150
9151 // fold (srl (shl x, c), c) -> (and x, cst2)
9152 // TODO - (srl (shl x, c1), c2).
9153 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
9154 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
9155 SDLoc DL(N);
9156 SDValue Mask =
9157 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
9158 AddToWorklist(Mask.getNode());
9159 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
9160 }
9161
9162 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9163 // TODO - support non-uniform vector shift amounts.
9164 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9165 // Shifting in all undef bits?
9167 unsigned BitSize = SmallVT.getScalarSizeInBits();
9168 if (N1C->getAPIntValue().uge(BitSize))
9169 return DAG.getUNDEF(VT);
9170
9171 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9172 uint64_t ShiftAmt = N1C->getZExtValue();
9173 SDLoc DL0(N0);
9175 N0.getOperand(0),
9176 DAG.getConstant(ShiftAmt, DL0,
9178 AddToWorklist(SmallShift.getNode());
9180 SDLoc DL(N);
9181 return DAG.getNode(ISD::AND, DL, VT,
9183 DAG.getConstant(Mask, DL, VT));
9184 }
9185 }
9186
9187 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
9188 // bit, which is unmodified by sra.
9189 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9190 if (N0.getOpcode() == ISD::SRA)
9191 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9192 }
9193
9194 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
9195 if (N1C && N0.getOpcode() == ISD::CTLZ &&
9196 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9197 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9198
9199 // If any of the input bits are KnownOne, then the input couldn't be all
9200 // zeros, thus the result of the srl will always be zero.
9201 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9202
9203 // If all of the bits input the to ctlz node are known to be zero, then
9204 // the result of the ctlz is "32" and the result of the shift is one.
9205 APInt UnknownBits = ~Known.Zero;
9206 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9207
9208 // Otherwise, check to see if there is exactly one bit input to the ctlz.
9209 if (UnknownBits.isPowerOf2()) {
9210 // Okay, we know that only that the single bit specified by UnknownBits
9211 // could be set on input to the CTLZ node. If this bit is set, the SRL
9212 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9213 // to an SRL/XOR pair, which is likely to simplify more.
9214 unsigned ShAmt = UnknownBits.countTrailingZeros();
9215 SDValue Op = N0.getOperand(0);
9216
9217 if (ShAmt) {
9218 SDLoc DL(N0);
9219 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9220 DAG.getConstant(ShAmt, DL,
9221 getShiftAmountTy(Op.getValueType())));
9222 AddToWorklist(Op.getNode());
9223 }
9224
9225 SDLoc DL(N);
9226 return DAG.getNode(ISD::XOR, DL, VT,
9227 Op, DAG.getConstant(1, DL, VT));
9228 }
9229 }
9230
9231 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9232 if (N1.getOpcode() == ISD::TRUNCATE &&
9233 N1.getOperand(0).getOpcode() == ISD::AND) {
9235 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9236 }
9237
9238 // fold operands of srl based on knowledge that the low bits are not
9239 // demanded.
9241 return SDValue(N, 0);
9242
9243 if (N1C && !N1C->isOpaque())
9245 return NewSRL;
9246
9247 // Attempt to convert a srl of a load into a narrower zero-extending load.
9249 return NarrowLoad;
9250
9251 // Here is a common situation. We want to optimize:
9252 //
9253 // %a = ...
9254 // %b = and i32 %a, 2
9255 // %c = srl i32 %b, 1
9256 // brcond i32 %c ...
9257 //
9258 // into
9259 //
9260 // %a = ...
9261 // %b = and %a, 2
9262 // %c = setcc eq %b, 0
9263 // brcond %c ...
9264 //
9265 // However when after the source operand of SRL is optimized into AND, the SRL
9266 // itself may not be optimized further. Look for it and add the BRCOND into
9267 // the worklist.
9268 if (N->hasOneUse()) {
9269 SDNode *Use = *N->use_begin();
9270 if (Use->getOpcode() == ISD::BRCOND)
9271 AddToWorklist(Use);
9272 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9273 // Also look pass the truncate.
9274 Use = *Use->use_begin();
9275 if (Use->getOpcode() == ISD::BRCOND)
9276 AddToWorklist(Use);
9277 }
9278 }
9279
9280 // Try to transform this shift into a multiply-high if
9281 // it matches the appropriate pattern detected in combineShiftToMULH.
9282 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9283 return MULH;
9284
9285 return SDValue();
9286}
9287
9288SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9289 EVT VT = N->getValueType(0);
9290 SDValue N0 = N->getOperand(0);
9291 SDValue N1 = N->getOperand(1);
9292 SDValue N2 = N->getOperand(2);
9293 bool IsFSHL = N->getOpcode() == ISD::FSHL;
9294 unsigned BitWidth = VT.getScalarSizeInBits();
9295
9296 // fold (fshl N0, N1, 0) -> N0
9297 // fold (fshr N0, N1, 0) -> N1
9299 if (DAG.MaskedValueIsZero(
9300 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9301 return IsFSHL ? N0 : N1;
9302
9303 auto IsUndefOrZero = [](SDValue V) {
9304 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9305 };
9306
9307 // TODO - support non-uniform vector shift amounts.
9308 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9309 EVT ShAmtTy = N2.getValueType();
9310
9311 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9312 if (Cst->getAPIntValue().uge(BitWidth)) {
9314 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9315 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9316 }
9317
9318 unsigned ShAmt = Cst->getZExtValue();
9319 if (ShAmt == 0)
9320 return IsFSHL ? N0 : N1;
9321
9322 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9323 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9324 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9325 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9326 if (IsUndefOrZero(N0))
9327 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9328 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9329 SDLoc(N), ShAmtTy));
9330 if (IsUndefOrZero(N1))
9331 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9332 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9333 SDLoc(N), ShAmtTy));
9334
9335 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9336 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9337 // TODO - bigendian support once we have test coverage.
9338 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9339 // TODO - permit LHS EXTLOAD if extensions are shifted out.
9340 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9341 !DAG.getDataLayout().isBigEndian()) {
9342 auto *LHS = dyn_cast<LoadSDNode>(N0);
9343 auto *RHS = dyn_cast<LoadSDNode>(N1);
9344 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9345 LHS->getAddressSpace() == RHS->getAddressSpace() &&
9346 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9347 ISD::isNON_EXTLoad(LHS)) {
9348 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9349 SDLoc DL(RHS);
9351 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9352 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9353 bool Fast = false;
9354 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9355 RHS->getAddressSpace(), NewAlign,
9356 RHS->getMemOperand()->getFlags(), &Fast) &&
9357 Fast) {
9359 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9360 AddToWorklist(NewPtr.getNode());
9361 SDValue Load = DAG.getLoad(
9362 VT, DL, RHS->getChain(), NewPtr,
9363 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9364 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9365 // Replace the old load's chain with the new load's chain.
9366 WorklistRemover DeadNodes(*this);
9367 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9368 return Load;
9369 }
9370 }
9371 }
9372 }
9373 }
9374
9375 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9376 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9377 // iff We know the shift amount is in range.
9378 // TODO: when is it worth doing SUB(BW, N2) as well?
9379 if (isPowerOf2_32(BitWidth)) {
9381 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9382 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9383 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9384 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9385 }
9386
9387 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9388 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9389 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9390 // is legal as well we might be better off avoiding non-constant (BW - N2).
9391 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9392 if (N0 == N1 && hasOperation(RotOpc, VT))
9393 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9394
9395 // Simplify, based on bits shifted out of N0/N1.
9397 return SDValue(N, 0);
9398
9399 return SDValue();
9400}
9401
9402SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
9403 SDValue N0 = N->getOperand(0);
9404 SDValue N1 = N->getOperand(1);
9405 if (SDValue V = DAG.simplifyShift(N0, N1))
9406 return V;
9407
9408 EVT VT = N0.getValueType();
9409
9410 // fold (*shlsat c1, c2) -> c1<<c2
9411 if (SDValue C =
9412 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
9413 return C;
9414
9415 return SDValue();
9416}
9417
9418// Given a ABS node, detect the following pattern:
9419// (ABS (SUB (EXTEND a), (EXTEND b))).
9420// Generates UABD/SABD instruction.
9422 const TargetLowering &TLI) {
9423 SDValue AbsOp1 = N->getOperand(0);
9424 SDValue Op0, Op1;
9425
9426 if (AbsOp1.getOpcode() != ISD::SUB)
9427 return SDValue();
9428
9429 Op0 = AbsOp1.getOperand(0);
9430 Op1 = AbsOp1.getOperand(1);
9431
9432 unsigned Opc0 = Op0.getOpcode();
9433 // Check if the operands of the sub are (zero|sign)-extended.
9434 if (Opc0 != Op1.getOpcode() ||
9435 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9436 return SDValue();
9437
9438 EVT VT1 = Op0.getOperand(0).getValueType();
9439 EVT VT2 = Op1.getOperand(0).getValueType();
9440 // Check if the operands are of same type and valid size.
9441 unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9442 if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9443 return SDValue();
9444
9445 Op0 = Op0.getOperand(0);
9446 Op1 = Op1.getOperand(0);
9447 SDValue ABD =
9448 DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9449 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9450}
9451
9452SDValue DAGCombiner::visitABS(SDNode *N) {
9453 SDValue N0 = N->getOperand(0);
9454 EVT VT = N->getValueType(0);
9455
9456 // fold (abs c1) -> c2
9458 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9459 // fold (abs (abs x)) -> (abs x)
9460 if (N0.getOpcode() == ISD::ABS)
9461 return N0;
9462 // fold (abs x) -> x iff not-negative
9463 if (DAG.SignBitIsZero(N0))
9464 return N0;
9465
9466 if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9467 return ABD;
9468
9469 return SDValue();
9470}
9471
9472SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9473 SDValue N0 = N->getOperand(0);
9474 EVT VT = N->getValueType(0);
9475
9476 // fold (bswap c1) -> c2
9478 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9479 // fold (bswap (bswap x)) -> x
9480 if (N0.getOpcode() == ISD::BSWAP)
9481 return N0->getOperand(0);
9482
9483 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
9484 // isn't supported, it will be expanded to bswap followed by a manual reversal
9485 // of bits in each byte. By placing bswaps before bitreverse, we can remove
9486 // the two bswaps if the bitreverse gets expanded.
9487 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
9488 SDLoc DL(N);
9489 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9490 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
9491 }
9492
9493 return SDValue();
9494}
9495
9496SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9497 SDValue N0 = N->getOperand(0);
9498 EVT VT = N->getValueType(0);
9499
9500 // fold (bitreverse c1) -> c2
9502 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9503 // fold (bitreverse (bitreverse x)) -> x
9504 if (N0.getOpcode() == ISD::BITREVERSE)
9505 return N0.getOperand(0);
9506 return SDValue();
9507}
9508
9509SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9510 SDValue N0 = N->getOperand(0);
9511 EVT VT = N->getValueType(0);
9512
9513 // fold (ctlz c1) -> c2
9515 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9516
9517 // If the value is known never to be zero, switch to the undef version.
9518 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9519 if (DAG.isKnownNeverZero(N0))
9520 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9521 }
9522
9523 return SDValue();
9524}
9525
9526SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9527 SDValue N0 = N->getOperand(0);
9528 EVT VT = N->getValueType(0);
9529
9530 // fold (ctlz_zero_undef c1) -> c2
9532 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9533 return SDValue();
9534}
9535
9536SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9537 SDValue N0 = N->getOperand(0);
9538 EVT VT = N->getValueType(0);
9539
9540 // fold (cttz c1) -> c2
9542 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9543
9544 // If the value is known never to be zero, switch to the undef version.
9545 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9546 if (DAG.isKnownNeverZero(N0))
9547 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9548 }
9549
9550 return SDValue();
9551}
9552
9553SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9554 SDValue N0 = N->getOperand(0);
9555 EVT VT = N->getValueType(0);
9556
9557 // fold (cttz_zero_undef c1) -> c2
9559 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9560 return SDValue();
9561}
9562
9563SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9564 SDValue N0 = N->getOperand(0);
9565 EVT VT = N->getValueType(0);
9566
9567 // fold (ctpop c1) -> c2
9569 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9570 return SDValue();
9571}
9572
9573// FIXME: This should be checking for no signed zeros on individual operands, as
9574// well as no nans.
9576 SDValue RHS,
9577 const TargetLowering &TLI) {
9578 const TargetOptions &Options = DAG.getTarget().Options;
9579 EVT VT = LHS.getValueType();
9580
9581 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9583 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9584}
9585
9586/// Generate Min/Max node
9588 SDValue RHS, SDValue True, SDValue False,
9589 ISD::CondCode CC, const TargetLowering &TLI,
9590 SelectionDAG &DAG) {
9591 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9592 return SDValue();
9593
9595 switch (CC) {
9596 case ISD::SETOLT:
9597 case ISD::SETOLE:
9598 case ISD::SETLT:
9599 case ISD::SETLE:
9600 case ISD::SETULT:
9601 case ISD::SETULE: {
9602 // Since it's known never nan to get here already, either fminnum or
9603 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9604 // expanded in terms of it.
9605 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9607 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9608
9609 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9610 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9611 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9612 return SDValue();
9613 }
9614 case ISD::SETOGT:
9615 case ISD::SETOGE:
9616 case ISD::SETGT:
9617 case ISD::SETGE:
9618 case ISD::SETUGT:
9619 case ISD::SETUGE: {
9620 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9622 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9623
9624 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9625 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9626 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9627 return SDValue();
9628 }
9629 default:
9630 return SDValue();
9631 }
9632}
9633
9634/// If a (v)select has a condition value that is a sign-bit test, try to smear
9635/// the condition operand sign-bit across the value width and use it as a mask.
9637 SDValue Cond = N->getOperand(0);
9638 SDValue C1 = N->getOperand(1);
9639 SDValue C2 = N->getOperand(2);
9641 return SDValue();
9642
9643 EVT VT = N->getValueType(0);
9644 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9645 VT != Cond.getOperand(0).getValueType())
9646 return SDValue();
9647
9648 // The inverted-condition + commuted-select variants of these patterns are
9649 // canonicalized to these forms in IR.
9650 SDValue X = Cond.getOperand(0);
9651 SDValue CondC = Cond.getOperand(1);
9652 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9655 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9656 SDLoc DL(N);
9657 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9658 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9659 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9660 }
9662 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9663 SDLoc DL(N);
9664 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9665 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9666 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9667 }
9668 return SDValue();
9669}
9670
9671SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9672 SDValue Cond = N->getOperand(0);
9673 SDValue N1 = N->getOperand(1);
9674 SDValue N2 = N->getOperand(2);
9675 EVT VT = N->getValueType(0);
9676 EVT CondVT = Cond.getValueType();
9677 SDLoc DL(N);
9678
9679 if (!VT.isInteger())
9680 return SDValue();
9681
9683 auto *C2 = dyn_cast<ConstantSDNode>(N2);
9684 if (!C1 || !C2)
9685 return SDValue();
9686
9687 // Only do this before legalization to avoid conflicting with target-specific
9688 // transforms in the other direction (create a select from a zext/sext). There
9689 // is also a target-independent combine here in DAGCombiner in the other
9690 // direction for (select Cond, -1, 0) when the condition is not i1.
9691 if (CondVT == MVT::i1 && !LegalOperations) {
9692 if (C1->isZero() && C2->isOne()) {
9693 // select Cond, 0, 1 --> zext (!Cond)
9695 if (VT != MVT::i1)
9697 return NotCond;
9698 }
9699 if (C1->isZero() && C2->isAllOnes()) {
9700 // select Cond, 0, -1 --> sext (!Cond)
9702 if (VT != MVT::i1)
9704 return NotCond;
9705 }
9706 if (C1->isOne() && C2->isZero()) {
9707 // select Cond, 1, 0 --> zext (Cond)
9708 if (VT != MVT::i1)
9709 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9710 return Cond;
9711 }
9712 if (C1->isAllOnes() && C2->isZero()) {
9713 // select Cond, -1, 0 --> sext (Cond)
9714 if (VT != MVT::i1)
9715 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9716 return Cond;
9717 }
9718
9719 // Use a target hook because some targets may prefer to transform in the
9720 // other direction.
9721 if (TLI.convertSelectOfConstantsToMath(VT)) {
9722 // For any constants that differ by 1, we can transform the select into an
9723 // extend and add.
9724 const APInt &C1Val = C1->getAPIntValue();
9725 const APInt &C2Val = C2->getAPIntValue();
9726 if (C1Val - 1 == C2Val) {
9727 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9728 if (VT != MVT::i1)
9729 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9730 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9731 }
9732 if (C1Val + 1 == C2Val) {
9733 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9734 if (VT != MVT::i1)
9735 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9736 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9737 }
9738
9739 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9740 if (C1Val.isPowerOf2() && C2Val.isZero()) {
9741 if (VT != MVT::i1)
9742 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9743 SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9744 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9745 }
9746
9748 return V;
9749 }
9750
9751 return SDValue();
9752 }
9753
9754 // fold (select Cond, 0, 1) -> (xor Cond, 1)
9755 // We can't do this reliably if integer based booleans have different contents
9756 // to floating point based booleans. This is because we can't tell whether we
9757 // have an integer-based boolean or a floating-point-based boolean unless we
9758 // can find the SETCC that produced it and inspect its operands. This is
9759 // fairly easy if C is the SETCC node, but it can potentially be
9760 // undiscoverable (or not reasonably discoverable). For example, it could be
9761 // in another basic block or it could require searching a complicated
9762 // expression.
9763 if (CondVT.isInteger() &&
9764 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9766 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9768 C1->isZero() && C2->isOne()) {
9770 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9771 if (VT.bitsEq(CondVT))
9772 return NotCond;
9773 return DAG.getZExtOrTrunc(NotCond, DL, VT);
9774 }
9775
9776 return SDValue();
9777}
9778
9780 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9781 "Expected a (v)select");
9782 SDValue Cond = N->getOperand(0);
9783 SDValue T = N->getOperand(1), F = N->getOperand(2);
9784 EVT VT = N->getValueType(0);
9785 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9786 return SDValue();
9787
9788 // select Cond, Cond, F --> or Cond, F
9789 // select Cond, 1, F --> or Cond, F
9790 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9791 return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9792
9793 // select Cond, T, Cond --> and Cond, T
9794 // select Cond, T, 0 --> and Cond, T
9795 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9796 return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9797
9798 // select Cond, T, 1 --> or (not Cond), T
9799 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9800 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9801 return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9802 }
9803
9804 // select Cond, 0, F --> and (not Cond), F
9805 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9806 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9807 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9808 }
9809
9810 return SDValue();
9811}
9812
9814 SDValue N0 = N->getOperand(0);
9815 SDValue N1 = N->getOperand(1);
9816 SDValue N2 = N->getOperand(2);
9817 EVT VT = N->getValueType(0);
9818 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
9819 return SDValue();
9820
9821 SDValue Cond0 = N0.getOperand(0);
9822 SDValue Cond1 = N0.getOperand(1);
9824 if (VT != Cond0.getValueType())
9825 return SDValue();
9826
9827 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
9828 // compare is inverted from that pattern ("Cond0 s> -1").
9829 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
9830 ; // This is the pattern we are looking for.
9831 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
9832 std::swap(N1, N2);
9833 else
9834 return SDValue();
9835
9836 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
9837 if (isNullOrNullSplat(N2)) {
9838 SDLoc DL(N);
9839 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9840 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9841 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
9842 }
9843
9844 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
9846 SDLoc DL(N);
9847 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9848 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9849 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
9850 }
9851
9852 // If we have to invert the sign bit mask, only do that transform if the
9853 // target has a bitwise 'and not' instruction (the invert is free).
9854 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
9855 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9856 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
9857 SDLoc DL(N);
9858 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9859 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9860 SDValue Not = DAG.getNOT(DL, Sra, VT);
9861 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
9862 }
9863
9864 // TODO: There's another pattern in this family, but it may require
9865 // implementing hasOrNot() to check for profitability:
9866 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
9867
9868 return SDValue();
9869}
9870
9871SDValue DAGCombiner::visitSELECT(SDNode *N) {
9872 SDValue N0 = N->getOperand(0);
9873 SDValue N1 = N->getOperand(1);
9874 SDValue N2 = N->getOperand(2);
9875 EVT VT = N->getValueType(0);
9876 EVT VT0 = N0.getValueType();
9877 SDLoc DL(N);
9878 SDNodeFlags Flags = N->getFlags();
9879
9880 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9881 return V;
9882
9884 return V;
9885
9886 if (SDValue V = foldBoolSelectToLogic(N, DAG))
9887 return V;
9888
9889 // If we can fold this based on the true/false value, do so.
9890 if (SimplifySelectOps(N, N1, N2))
9891 return SDValue(N, 0); // Don't revisit N.
9892
9893 if (VT0 == MVT::i1) {
9894 // The code in this block deals with the following 2 equivalences:
9895 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9896 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9897 // The target can specify its preferred form with the
9898 // shouldNormalizeToSelectSequence() callback. However we always transform
9899 // to the right anyway if we find the inner select exists in the DAG anyway
9900 // and we always transform to the left side if we know that we can further
9901 // optimize the combination of the conditions.
9902 bool normalizeToSequence =
9904 // select (and Cond0, Cond1), X, Y
9905 // -> select Cond0, (select Cond1, X, Y), Y
9906 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9907 SDValue Cond0 = N0->getOperand(0);
9908 SDValue Cond1 = N0->getOperand(1);
9910 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9911 if (normalizeToSequence || !InnerSelect.use_empty())
9912 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9913 InnerSelect, N2, Flags);
9914 // Cleanup on failure.
9915 if (InnerSelect.use_empty())
9916 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9917 }
9918 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9919 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9920 SDValue Cond0 = N0->getOperand(0);
9921 SDValue Cond1 = N0->getOperand(1);
9922 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9923 Cond1, N1, N2, Flags);
9924 if (normalizeToSequence || !InnerSelect.use_empty())
9925 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9926 InnerSelect, Flags);
9927 // Cleanup on failure.
9928 if (InnerSelect.use_empty())
9929 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9930 }
9931
9932 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9933 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9934 SDValue N1_0 = N1->getOperand(0);
9935 SDValue N1_1 = N1->getOperand(1);
9936 SDValue N1_2 = N1->getOperand(2);
9937 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9938 // Create the actual and node if we can generate good code for it.
9939 if (!normalizeToSequence) {
9940 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9941 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9942 N2, Flags);
9943 }
9944 // Otherwise see if we can optimize the "and" to a better pattern.
9945 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9946 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9947 N2, Flags);
9948 }
9949 }
9950 }
9951 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9952 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9953 SDValue N2_0 = N2->getOperand(0);
9954 SDValue N2_1 = N2->getOperand(1);
9955 SDValue N2_2 = N2->getOperand(2);
9956 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9957 // Create the actual or node if we can generate good code for it.
9958 if (!normalizeToSequence) {
9959 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9960 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9961 N2_2, Flags);
9962 }
9963 // Otherwise see if we can optimize to a better pattern.
9964 if (SDValue Combined = visitORLike(N0, N2_0, N))
9965 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9966 N2_2, Flags);
9967 }
9968 }
9969 }
9970
9971 // select (not Cond), N1, N2 -> select Cond, N2, N1
9972 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9973 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9974 SelectOp->setFlags(Flags);
9975 return SelectOp;
9976 }
9977
9978 // Fold selects based on a setcc into other things, such as min/max/abs.
9979 if (N0.getOpcode() == ISD::SETCC) {
9980 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9982
9983 // select (fcmp lt x, y), x, y -> fminnum x, y
9984 // select (fcmp gt x, y), x, y -> fmaxnum x, y
9985 //
9986 // This is OK if we don't care what happens if either operand is a NaN.
9987 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9989 CC, TLI, DAG))
9990 return FMinMax;
9991
9992 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9993 // This is conservatively limited to pre-legal-operations to give targets
9994 // a chance to reverse the transform if they want to do that. Also, it is
9995 // unlikely that the pattern would be formed late, so it's probably not
9996 // worth going through the other checks.
9997 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9998 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9999 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
10000 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
10002 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
10003 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
10004 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
10005 //
10006 // The IR equivalent of this transform would have this form:
10007 // %a = add %x, C
10008 // %c = icmp ugt %x, ~C
10009 // %r = select %c, -1, %a
10010 // =>
10011 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
10012 // %u0 = extractvalue %u, 0
10013 // %u1 = extractvalue %u, 1
10014 // %r = select %u1, -1, %u0
10015 SDVTList VTs = DAG.getVTList(VT, VT0);
10016 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
10017 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
10018 }
10019 }
10020
10021 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
10022 (!LegalOperations &&
10024 // Any flags available in a select/setcc fold will be on the setcc as they
10025 // migrated from fcmp
10026 Flags = N0.getNode()->getFlags();
10028 N2, N0.getOperand(2));
10029 SelectNode->setFlags(Flags);
10030 return SelectNode;
10031 }
10032
10033 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
10034 return NewSel;
10035 }
10036
10037 if (!VT.isVector())
10038 if (SDValue BinOp = foldSelectOfBinops(N))
10039 return BinOp;
10040
10041 return SDValue();
10042}
10043
10044// This function assumes all the vselect's arguments are CONCAT_VECTOR
10045// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
10047 SDLoc DL(N);
10048 SDValue Cond = N->getOperand(0);
10049 SDValue LHS = N->getOperand(1);
10050 SDValue RHS = N->getOperand(2);
10051 EVT VT = N->getValueType(0);
10052 int NumElems = VT.getVectorNumElements();
10053 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
10054 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
10055 Cond.getOpcode() == ISD::BUILD_VECTOR);
10056
10057 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
10058 // binary ones here.
10059 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
10060 return SDValue();
10061
10062 // We're sure we have an even number of elements due to the
10063 // concat_vectors we have as arguments to vselect.
10064 // Skip BV elements until we find one that's not an UNDEF
10065 // After we find an UNDEF element, keep looping until we get to half the
10066 // length of the BV and see if all the non-undef nodes are the same.
10067 ConstantSDNode *BottomHalf = nullptr;
10068 for (int i = 0; i < NumElems / 2; ++i) {
10069 if (Cond->getOperand(i)->isUndef())
10070 continue;
10071
10072 if (BottomHalf == nullptr)
10073 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10074 else if (Cond->getOperand(i).getNode() != BottomHalf)
10075 return SDValue();
10076 }
10077
10078 // Do the same for the second half of the BuildVector
10079 ConstantSDNode *TopHalf = nullptr;
10080 for (int i = NumElems / 2; i < NumElems; ++i) {
10081 if (Cond->getOperand(i)->isUndef())
10082 continue;
10083
10084 if (TopHalf == nullptr)
10085 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10086 else if (Cond->getOperand(i).getNode() != TopHalf)
10087 return SDValue();
10088 }
10089
10091 "One half of the selector was all UNDEFs and the other was all the "
10092 "same value. This should have been addressed before this function.");
10093 return DAG.getNode(
10095 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
10096 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
10097}
10098
10099bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
10100 if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
10101 return false;
10102
10103 // For now we check only the LHS of the add.
10104 SDValue LHS = Index.getOperand(0);
10105 SDValue SplatVal = DAG.getSplatValue(LHS);
10106 if (!SplatVal)
10107 return false;
10108
10109 BasePtr = SplatVal;
10110 Index = Index.getOperand(1);
10111 return true;
10112}
10113
10114// Fold sext/zext of index into index type.
10116 bool Scaled, SelectionDAG &DAG) {
10117 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10118
10119 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10120 SDValue Op = Index.getOperand(0);
10122 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10123 Index = Op;
10124 return true;
10125 }
10126 }
10127
10128 if (Index.getOpcode() == ISD::SIGN_EXTEND) {
10129 SDValue Op = Index.getOperand(0);
10131 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10132 Index = Op;
10133 return true;
10134 }
10135 }
10136
10137 return false;
10138}
10139
10140SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10142 SDValue Mask = MSC->getMask();
10143 SDValue Chain = MSC->getChain();
10144 SDValue Index = MSC->getIndex();
10145 SDValue Scale = MSC->getScale();
10146 SDValue StoreVal = MSC->getValue();
10147 SDValue BasePtr = MSC->getBasePtr();
10148 SDLoc DL(N);
10149
10150 // Zap scatters with a zero mask.
10152 return Chain;
10153
10154 if (refineUniformBase(BasePtr, Index, DAG)) {
10155 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10156 return DAG.getMaskedScatter(
10157 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10158 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10159 }
10160
10161 if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
10162 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10163 return DAG.getMaskedScatter(
10164 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10165 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10166 }
10167
10168 return SDValue();
10169}
10170
10171SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10173 SDValue Mask = MST->getMask();
10174 SDValue Chain = MST->getChain();
10175 SDValue Value = MST->getValue();
10176 SDValue Ptr = MST->getBasePtr();
10177 SDLoc DL(N);
10178
10179 // Zap masked stores with a zero mask.
10181 return Chain;
10182
10183 // If this is a masked load with an all ones mask, we can use a unmasked load.
10184 // FIXME: Can we do this for indexed, compressing, or truncating stores?
10185 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10186 !MST->isCompressingStore() && !MST->isTruncatingStore())
10187 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10188 MST->getBasePtr(), MST->getPointerInfo(),
10190 MST->getAAInfo());
10191
10192 // Try transforming N to an indexed store.
10194 return SDValue(N, 0);
10195
10196 if (MST->isTruncatingStore() && MST->isUnindexed() &&
10197 Value.getValueType().isInteger() &&
10199 !cast<ConstantSDNode>(Value)->isOpaque())) {
10201 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10203
10204 // See if we can simplify the operation with
10205 // SimplifyDemandedBits, which only works if the value has a single use.
10207 // Re-visit the store if anything changed and the store hasn't been merged
10208 // with another node (N is deleted) SimplifyDemandedBits will add Value's
10209 // node back to the worklist if necessary, but we also need to re-visit
10210 // the Store node itself.
10211 if (N->getOpcode() != ISD::DELETED_NODE)
10212 AddToWorklist(N);
10213 return SDValue(N, 0);
10214 }
10215 }
10216
10217 // If this is a TRUNC followed by a masked store, fold this into a masked
10218 // truncating store. We can do this even if this is already a masked
10219 // truncstore.
10220 if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
10221 MST->isUnindexed() &&
10222 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10223 MST->getMemoryVT(), LegalOperations)) {
10224 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10225 Value.getOperand(0).getValueType());
10226 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10227 MST->getOffset(), Mask, MST->getMemoryVT(),
10228 MST->getMemOperand(), MST->getAddressingMode(),
10229 /*IsTruncating=*/true);
10230 }
10231
10232 return SDValue();
10233}
10234
10235SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10237 SDValue Mask = MGT->getMask();
10238 SDValue Chain = MGT->getChain();
10239 SDValue Index = MGT->getIndex();
10240 SDValue Scale = MGT->getScale();
10241 SDValue PassThru = MGT->getPassThru();
10242 SDValue BasePtr = MGT->getBasePtr();
10243 SDLoc DL(N);
10244
10245 // Zap gathers with a zero mask.
10247 return CombineTo(N, PassThru, MGT->getChain());
10248
10249 if (refineUniformBase(BasePtr, Index, DAG)) {
10250 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10251 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10252 MGT->getMemoryVT(), DL, Ops,
10253 MGT->getMemOperand(), MGT->getIndexType(),
10254 MGT->getExtensionType());
10255 }
10256
10257 if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
10258 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10259 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10260 MGT->getMemoryVT(), DL, Ops,
10261 MGT->getMemOperand(), MGT->getIndexType(),
10262 MGT->getExtensionType());
10263 }
10264
10265 return SDValue();
10266}
10267
10268SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10270 SDValue Mask = MLD->getMask();
10271 SDLoc DL(N);
10272
10273 // Zap masked loads with a zero mask.
10275 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10276
10277 // If this is a masked load with an all ones mask, we can use a unmasked load.
10278 // FIXME: Can we do this for indexed, expanding, or extending loads?
10279 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10280 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10281 SDValue NewLd = DAG.getLoad(
10282 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10283 MLD->getPointerInfo(), MLD->getOriginalAlign(),
10284 MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
10285 return CombineTo(N, NewLd, NewLd.getValue(1));
10286 }
10287
10288 // Try transforming N to an indexed load.
10290 return SDValue(N, 0);
10291
10292 return SDValue();
10293}
10294
10295/// A vector select of 2 constant vectors can be simplified to math/logic to
10296/// avoid a variable select instruction and possibly avoid constant loads.
10297SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10298 SDValue Cond = N->getOperand(0);
10299 SDValue N1 = N->getOperand(1);
10300 SDValue N2 = N->getOperand(2);
10301 EVT VT = N->getValueType(0);
10302 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10306 return SDValue();
10307
10308 // Check if we can use the condition value to increment/decrement a single
10309 // constant value. This simplifies a select to an add and removes a constant
10310 // load/materialization from the general case.
10311 bool AllAddOne = true;
10312 bool AllSubOne = true;
10313 unsigned Elts = VT.getVectorNumElements();
10314 for (unsigned i = 0; i != Elts; ++i) {
10315 SDValue N1Elt = N1.getOperand(i);
10316 SDValue N2Elt = N2.getOperand(i);
10317 if (N1Elt.isUndef() || N2Elt.isUndef())
10318 continue;
10319 if (N1Elt.getValueType() != N2Elt.getValueType())
10320 continue;
10321
10322 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10323 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10324 if (C1 != C2 + 1)
10325 AllAddOne = false;
10326 if (C1 != C2 - 1)
10327 AllSubOne = false;
10328 }
10329
10330 // Further simplifications for the extra-special cases where the constants are
10331 // all 0 or all -1 should be implemented as folds of these patterns.
10332 SDLoc DL(N);
10333 if (AllAddOne || AllSubOne) {
10334 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10335 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10336 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10337 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10338 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10339 }
10340
10341 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10342 APInt Pow2C;
10343 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10344 isNullOrNullSplat(N2)) {
10346 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10347 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10348 }
10349
10351 return V;
10352
10353 // The general case for select-of-constants:
10354 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10355 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10356 // leave that to a machine-specific pass.
10357 return SDValue();
10358}
10359
10360SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10361 SDValue N0 = N->getOperand(0);
10362 SDValue N1 = N->getOperand(1);
10363 SDValue N2 = N->getOperand(2);
10364 EVT VT = N->getValueType(0);
10365 SDLoc DL(N);
10366
10367 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10368 return V;
10369
10370 if (SDValue V = foldBoolSelectToLogic(N, DAG))
10371 return V;
10372
10373 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10374 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10375 return DAG.getSelect(DL, VT, F, N2, N1);
10376
10377 // Canonicalize integer abs.
10378 // vselect (setg[te] X, 0), X, -X ->
10379 // vselect (setgt X, -1), X, -X ->
10380 // vselect (setl[te] X, 0), -X, X ->
10381 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10382 if (N0.getOpcode() == ISD::SETCC) {
10383 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10385 bool isAbs = false;
10386 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10387
10388 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10389 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10390 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10392 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10393 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10394 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
10395
10396 if (isAbs) {
10398 return DAG.getNode(ISD::ABS, DL, VT, LHS);
10399
10400 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10401 DAG.getConstant(VT.getScalarSizeInBits() - 1,
10402 DL, getShiftAmountTy(VT)));
10403 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10404 AddToWorklist(Shift.getNode());
10405 AddToWorklist(Add.getNode());
10406 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10407 }
10408
10409 // vselect x, y (fcmp lt x, y) -> fminnum x, y
10410 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10411 //
10412 // This is OK if we don't care about what happens if either operand is a
10413 // NaN.
10414 //
10415 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10416 if (SDValue FMinMax =
10417 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10418 return FMinMax;
10419 }
10420
10421 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10422 return S;
10423 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10424 return S;
10425
10426 // If this select has a condition (setcc) with narrower operands than the
10427 // select, try to widen the compare to match the select width.
10428 // TODO: This should be extended to handle any constant.
10429 // TODO: This could be extended to handle non-loading patterns, but that
10430 // requires thorough testing to avoid regressions.
10431 if (isNullOrNullSplat(RHS)) {
10432 EVT NarrowVT = LHS.getValueType();
10433 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10434 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10435 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10436 unsigned WideWidth = WideVT.getScalarSizeInBits();
10437 bool IsSigned = isSignedIntSetCC(CC);
10438 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10439 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10440 SetCCWidth != 1 && SetCCWidth < WideWidth &&
10443 // Both compare operands can be widened for free. The LHS can use an
10444 // extended load, and the RHS is a constant:
10445 // vselect (ext (setcc load(X), C)), N1, N2 -->
10446 // vselect (setcc extload(X), C'), N1, N2
10447 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10448 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10449 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10452 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10453 }
10454 }
10455
10456 // Match VSELECTs into add with unsigned saturation.
10457 if (hasOperation(ISD::UADDSAT, VT)) {
10458 // Check if one of the arms of the VSELECT is vector with all bits set.
10459 // If it's on the left side invert the predicate to simplify logic below.
10460 SDValue Other;
10461 ISD::CondCode SatCC = CC;
10462 if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10463 Other = N2;
10465 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10466 Other = N1;
10467 }
10468
10469 if (Other && Other.getOpcode() == ISD::ADD) {
10471 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10472
10473 // Canonicalize condition operands.
10474 if (SatCC == ISD::SETUGE) {
10477 }
10478
10479 // We can test against either of the addition operands.
10480 // x <= x+y ? x+y : ~0 --> uaddsat x, y
10481 // x+y >= x ? x+y : ~0 --> uaddsat x, y
10482 if (SatCC == ISD::SETULE && Other == CondRHS &&
10483 (OpLHS == CondLHS || OpRHS == CondLHS))
10484 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10485
10486 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10487 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10488 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10489 CondLHS == OpLHS) {
10490 // If the RHS is a constant we have to reverse the const
10491 // canonicalization.
10492 // x >= ~C ? x+C : ~0 --> uaddsat x, C
10494 return Cond->getAPIntValue() == ~Op->getAPIntValue();
10495 };
10496 if (SatCC == ISD::SETULE &&
10498 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10499 }
10500 }
10501 }
10502
10503 // Match VSELECTs into sub with unsigned saturation.
10504 if (hasOperation(ISD::USUBSAT, VT)) {
10505 // Check if one of the arms of the VSELECT is a zero vector. If it's on
10506 // the left side invert the predicate to simplify logic below.
10507 SDValue Other;
10508 ISD::CondCode SatCC = CC;
10509 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10510 Other = N2;
10513 Other = N1;
10514 }
10515
10516 if (Other && Other.getNumOperands() == 2) {
10518 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10519
10520 if (Other.getOpcode() == ISD::SUB &&
10521 LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10522 OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10523 // Look for a general sub with unsigned saturation first.
10524 // zext(x) >= y ? x - trunc(y) : 0
10525 // --> usubsat(x,trunc(umin(y,SatLimit)))
10526 // zext(x) > y ? x - trunc(y) : 0
10527 // --> usubsat(x,trunc(umin(y,SatLimit)))
10528 if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10529 return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10530 DL);
10531 }
10532
10533 if (OpLHS == LHS) {
10534 // Look for a general sub with unsigned saturation first.
10535 // x >= y ? x-y : 0 --> usubsat x, y
10536 // x > y ? x-y : 0 --> usubsat x, y
10537 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10538 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10539 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10540
10541 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10542 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10543 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10544 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10545 // If the RHS is a constant we have to reverse the const
10546 // canonicalization.
10547 // x > C-1 ? x+-C : 0 --> usubsat x, C
10549 return (!Op && !Cond) ||
10550 (Op && Cond &&
10551 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10552 };
10553 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10555 /*AllowUndefs*/ true)) {
10556 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10557 DAG.getConstant(0, DL, VT), OpRHS);
10558 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10559 }
10560
10561 // Another special case: If C was a sign bit, the sub has been
10562 // canonicalized into a xor.
10563 // FIXME: Would it be better to use computeKnownBits to determine
10564 // whether it's safe to decanonicalize the xor?
10565 // x s< 0 ? x^C : 0 --> usubsat x, C
10567 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10570 SplatValue.isSignMask()) {
10571 // Note that we have to rebuild the RHS constant here to
10572 // ensure we don't rely on particular values of undef lanes.
10573 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10574 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10575 }
10576 }
10577 }
10578 }
10579 }
10580 }
10581 }
10582
10583 if (SimplifySelectOps(N, N1, N2))
10584 return SDValue(N, 0); // Don't revisit N.
10585
10586 // Fold (vselect all_ones, N1, N2) -> N1
10588 return N1;
10589 // Fold (vselect all_zeros, N1, N2) -> N2
10591 return N2;
10592
10593 // The ConvertSelectToConcatVector function is assuming both the above
10594 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10595 // and addressed.
10596 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10600 return CV;
10601 }
10602
10604 return V;
10605
10606 if (hasOperation(ISD::SRA, VT))
10608 return V;
10609
10610 return SDValue();
10611}
10612
10613SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10614 SDValue N0 = N->getOperand(0);
10615 SDValue N1 = N->getOperand(1);
10616 SDValue N2 = N->getOperand(2);
10617 SDValue N3 = N->getOperand(3);
10618 SDValue N4 = N->getOperand(4);
10620
10621 // fold select_cc lhs, rhs, x, x, cc -> x
10622 if (N2 == N3)
10623 return N2;
10624
10625 // Determine if the condition we're dealing with is constant
10627 CC, SDLoc(N), false)) {
10628 AddToWorklist(SCC.getNode());
10629
10630 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10631 if (!SCCC->isZero())
10632 return N2; // cond always true -> true val
10633 else
10634 return N3; // cond always false -> false val
10635 } else if (SCC->isUndef()) {
10636 // When the condition is UNDEF, just return the first operand. This is
10637 // coherent the DAG creation, no setcc node is created in this case
10638 return N2;
10639 } else if (SCC.getOpcode() == ISD::SETCC) {
10640 // Fold to a simpler select_cc
10641 SDValue SelectOp = DAG.getNode(
10642 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10643 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10644 SelectOp->setFlags(SCC->getFlags());
10645 return SelectOp;
10646 }
10647 }
10648
10649 // If we can fold this based on the true/false value, do so.
10650 if (SimplifySelectOps(N, N2, N3))
10651 return SDValue(N, 0); // Don't revisit N.
10652
10653 // fold select_cc into other things, such as min/max/abs
10654 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10655}
10656
10657SDValue DAGCombiner::visitSETCC(SDNode *N) {
10658 // setcc is very commonly used as an argument to brcond. This pattern
10659 // also lend itself to numerous combines and, as a result, it is desired
10660 // we keep the argument to a brcond as a setcc as much as possible.
10661 bool PreferSetCC =
10662 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10663
10664 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10665 EVT VT = N->getValueType(0);
10666
10667 // SETCC(FREEZE(X), CONST, Cond)
10668 // =>
10669 // FREEZE(SETCC(X, CONST, Cond))
10670 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10671 // isn't equivalent to true or false.
10672 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10673 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10674 //
10675 // This transformation is beneficial because visitBRCOND can fold
10676 // BRCOND(FREEZE(X)) to BRCOND(X).
10677
10678 // Conservatively optimize integer comparisons only.
10679 if (PreferSetCC) {
10680 // Do this only when SETCC is going to be used by BRCOND.
10681
10682 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10685 bool Updated = false;
10686
10687 // Is 'X Cond C' always true or false?
10689 bool False = (Cond == ISD::SETULT && C->isZero()) ||
10690 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
10691 (Cond == ISD::SETUGT && C->isAllOnes()) ||
10692 (Cond == ISD::SETGT && C->isMaxSignedValue());
10693 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
10694 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
10695 (Cond == ISD::SETUGE && C->isZero()) ||
10696 (Cond == ISD::SETGE && C->isMinSignedValue());
10697 return True || False;
10698 };
10699
10700 if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10701 if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10702 N0 = N0->getOperand(0);
10703 Updated = true;
10704 }
10705 }
10706 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10708 N0C)) {
10709 N1 = N1->getOperand(0);
10710 Updated = true;
10711 }
10712 }
10713
10714 if (Updated)
10715 return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10716 }
10717
10718 SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10719 SDLoc(N), !PreferSetCC);
10720
10721 if (!Combined)
10722 return SDValue();
10723
10724 // If we prefer to have a setcc, and we don't, we'll try our best to
10725 // recreate one using rebuildSetCC.
10726 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10728
10729 // We don't have anything interesting to combine to.
10730 if (NewSetCC.getNode() == N)
10731 return SDValue();
10732
10733 if (NewSetCC)
10734 return NewSetCC;
10735 }
10736
10737 return Combined;
10738}
10739
10740SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10741 SDValue LHS = N->getOperand(0);
10742 SDValue RHS = N->getOperand(1);
10743 SDValue Carry = N->getOperand(2);
10744 SDValue Cond = N->getOperand(3);
10745
10746 // If Carry is false, fold to a regular SETCC.
10747 if (isNullConstant(Carry))
10748 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10749
10750 return SDValue();
10751}
10752
10753/// Check if N satisfies:
10754/// N is used once.
10755/// N is a Load.
10756/// The load is compatible with ExtOpcode. It means
10757/// If load has explicit zero/sign extension, ExpOpcode must have the same
10758/// extension.
10759/// Otherwise returns true.
10760static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10761 if (!N.hasOneUse())
10762 return false;
10763
10764 if (!isa<LoadSDNode>(N))
10765 return false;
10766
10767 LoadSDNode *Load = cast<LoadSDNode>(N);
10768 ISD::LoadExtType LoadExt = Load->getExtensionType();
10770 return true;
10771
10772 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10773 // extension.
10776 return false;
10777
10778 return true;
10779}
10780
10781/// Fold
10782/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10783/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10784/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10785/// This function is called by the DAGCombiner when visiting sext/zext/aext
10786/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10788 SelectionDAG &DAG) {
10789 unsigned Opcode = N->getOpcode();
10790 SDValue N0 = N->getOperand(0);
10791 EVT VT = N->getValueType(0);
10792 SDLoc DL(N);
10793
10794 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10795 Opcode == ISD::ANY_EXTEND) &&
10796 "Expected EXTEND dag node in input!");
10797
10798 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10799 !N0.hasOneUse())
10800 return SDValue();
10801
10802 SDValue Op1 = N0->getOperand(1);
10803 SDValue Op2 = N0->getOperand(2);
10804 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10805 return SDValue();
10806
10808 if (Opcode == ISD::SIGN_EXTEND)
10810 else if (Opcode == ISD::ZERO_EXTEND)
10812
10815 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10816 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10817 return SDValue();
10818
10819 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10820 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10821 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10822}
10823
10824/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10825/// a build_vector of constants.
10826/// This function is called by the DAGCombiner when visiting sext/zext/aext
10827/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10828/// Vector extends are not folded if operations are legal; this is to
10829/// avoid introducing illegal build_vector dag nodes.
10831 SelectionDAG &DAG, bool LegalTypes) {
10832 unsigned Opcode = N->getOpcode();
10833 SDValue N0 = N->getOperand(0);
10834 EVT VT = N->getValueType(0);
10835 SDLoc DL(N);
10836
10837 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10838 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10840 && "Expected EXTEND dag node in input!");
10841
10842 // fold (sext c1) -> c1
10843 // fold (zext c1) -> c1
10844 // fold (aext c1) -> c1
10845 if (isa<ConstantSDNode>(N0))
10846 return DAG.getNode(Opcode, DL, VT, N0);
10847
10848 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10849 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10850 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10851 if (N0->getOpcode() == ISD::SELECT) {
10852 SDValue Op1 = N0->getOperand(1);
10853 SDValue Op2 = N0->getOperand(2);
10854 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10855 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10856 // For any_extend, choose sign extension of the constants to allow a
10857 // possible further transform to sign_extend_inreg.i.e.
10858 //
10859 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10860 // t2: i64 = any_extend t1
10861 // -->
10862 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10863 // -->
10864 // t4: i64 = sign_extend_inreg t3
10865 unsigned FoldOpc = Opcode;
10866 if (FoldOpc == ISD::ANY_EXTEND)
10868 return DAG.getSelect(DL, VT, N0->getOperand(0),
10869 DAG.getNode(FoldOpc, DL, VT, Op1),
10870 DAG.getNode(FoldOpc, DL, VT, Op2));
10871 }
10872 }
10873
10874 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10875 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10876 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10877 EVT SVT = VT.getScalarType();
10878 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10880 return SDValue();
10881
10882 // We can fold this node into a build_vector.
10883 unsigned VTBits = SVT.getSizeInBits();
10884 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10886 unsigned NumElts = VT.getVectorNumElements();
10887
10888 // For zero-extensions, UNDEF elements still guarantee to have the upper
10889 // bits set to zero.
10890 bool IsZext =
10891 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10892
10893 for (unsigned i = 0; i != NumElts; ++i) {
10894 SDValue Op = N0.getOperand(i);
10895 if (Op.isUndef()) {
10896 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10897 continue;
10898 }
10899
10900 SDLoc DL(Op);
10901 // Get the constant value and if needed trunc it to the size of the type.
10902 // Nodes like build_vector might have constants wider than the scalar type.
10903 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10904 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10905 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10906 else
10907 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10908 }
10909
10910 return DAG.getBuildVector(VT, DL, Elts);
10911}
10912
10913// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10914// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10915// transformation. Returns true if extension are possible and the above
10916// mentioned transformation is profitable.
10918 unsigned ExtOpc,
10920 const TargetLowering &TLI) {
10921 bool HasCopyToRegUses = false;
10922 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10923 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10924 UE = N0.getNode()->use_end();
10925 UI != UE; ++UI) {
10926 SDNode *User = *UI;
10927 if (User == N)
10928 continue;
10929 if (UI.getUse().getResNo() != N0.getResNo())
10930 continue;
10931 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10932 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10935 // Sign bits will be lost after a zext.
10936 return false;
10937 bool Add = false;
10938 for (unsigned i = 0; i != 2; ++i) {
10939 SDValue UseOp = User->getOperand(i);
10940 if (UseOp == N0)
10941 continue;
10942 if (!isa<ConstantSDNode>(UseOp))
10943 return false;
10944 Add = true;
10945 }
10946 if (Add)
10947 ExtendNodes.push_back(User);
10948 continue;
10949 }
10950 // If truncates aren't free and there are users we can't
10951 // extend, it isn't worthwhile.
10952 if (!isTruncFree)
10953 return false;
10954 // Remember if this value is live-out.
10955 if (User->getOpcode() == ISD::CopyToReg)
10956 HasCopyToRegUses = true;
10957 }
10958
10959 if (HasCopyToRegUses) {
10960 bool BothLiveOut = false;
10961 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10962 UI != UE; ++UI) {
10963 SDUse &Use = UI.getUse();
10964 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10965 BothLiveOut = true;
10966 break;
10967 }
10968 }
10969 if (BothLiveOut)
10970 // Both unextended and extended values are live out. There had better be
10971 // a good reason for the transformation.
10972 return ExtendNodes.size();
10973 }
10974 return true;
10975}
10976
10977void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10979 ISD::NodeType ExtType) {
10980 // Extend SetCC uses if necessary.
10981 SDLoc DL(ExtLoad);
10982 for (SDNode *SetCC : SetCCs) {
10984
10985 for (unsigned j = 0; j != 2; ++j) {
10986 SDValue SOp = SetCC->getOperand(j);
10987 if (SOp == OrigLoad)
10988 Ops.push_back(ExtLoad);
10989 else
10990 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10991 }
10992
10993 Ops.push_back(SetCC->getOperand(2));
10994 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10995 }
10996}
10997
10998// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10999SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
11000 SDValue N0 = N->getOperand(0);
11001 EVT DstVT = N->getValueType(0);
11002 EVT SrcVT = N0.getValueType();
11003
11004 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11005 N->getOpcode() == ISD::ZERO_EXTEND) &&
11006 "Unexpected node type (not an extend)!");
11007
11008 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
11009 // For example, on a target with legal v4i32, but illegal v8i32, turn:
11010 // (v8i32 (sext (v8i16 (load x))))
11011 // into:
11012 // (v8i32 (concat_vectors (v4i32 (sextload x)),
11013 // (v4i32 (sextload (x + 16)))))
11014 // Where uses of the original load, i.e.:
11015 // (v8i16 (load x))
11016 // are replaced with:
11017 // (v8i16 (truncate
11018 // (v8i32 (concat_vectors (v4i32 (sextload x)),
11019 // (v4i32 (sextload (x + 16)))))))
11020 //
11021 // This combine is only applicable to illegal, but splittable, vectors.
11022 // All legal types, and illegal non-vector types, are handled elsewhere.
11023 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
11024 //
11025 if (N0->getOpcode() != ISD::LOAD)
11026 return SDValue();
11027
11029
11031 !N0.hasOneUse() || !LN0->isSimple() ||
11032 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
11034 return SDValue();
11035
11037 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
11038 return SDValue();
11039
11040 ISD::LoadExtType ExtType =
11041 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11042
11043 // Try to split the vector types to get down to legal types.
11046 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
11047 SplitSrcVT.getVectorNumElements() > 1) {
11050 }
11051
11052 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
11053 return SDValue();
11054
11055 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
11056
11057 SDLoc DL(N);
11058 const unsigned NumSplits =
11059 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
11060 const unsigned Stride = SplitSrcVT.getStoreSize();
11063
11064 SDValue BasePtr = LN0->getBasePtr();
11065 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
11066 const unsigned Offset = Idx * Stride;
11067 const Align Align = commonAlignment(LN0->getAlign(), Offset);
11068
11070 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
11071 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
11072 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11073
11074 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
11075
11076 Loads.push_back(SplitLoad.getValue(0));
11077 Chains.push_back(SplitLoad.getValue(1));
11078 }
11079
11081 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
11082
11083 // Simplify TF.
11084 AddToWorklist(NewChain.getNode());
11085
11086 CombineTo(N, NewValue);
11087
11088 // Replace uses of the original load (before extension)
11089 // with a truncate of the concatenated sextloaded vectors.
11090 SDValue Trunc =
11091 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
11092 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
11093 CombineTo(N0.getNode(), Trunc, NewChain);
11094 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11095}
11096
11097// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11098// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11099SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
11100 assert(N->getOpcode() == ISD::ZERO_EXTEND);
11101 EVT VT = N->getValueType(0);
11102 EVT OrigVT = N->getOperand(0).getValueType();
11103 if (TLI.isZExtFree(OrigVT, VT))
11104 return SDValue();
11105
11106 // and/or/xor
11107 SDValue N0 = N->getOperand(0);
11108 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11109 N0.getOpcode() == ISD::XOR) ||
11110 N0.getOperand(1).getOpcode() != ISD::Constant ||
11111 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
11112 return SDValue();
11113
11114 // shl/shr
11115 SDValue N1 = N0->getOperand(0);
11116 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11117 N1.getOperand(1).getOpcode() != ISD::Constant ||
11118 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11119 return SDValue();
11120
11121 // load
11122 if (!isa<LoadSDNode>(N1.getOperand(0)))
11123 return SDValue();
11124 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11125 EVT MemVT = Load->getMemoryVT();
11126 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11127 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11128 return SDValue();
11129
11130
11131 // If the shift op is SHL, the logic op must be AND, otherwise the result
11132 // will be wrong.
11133 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11134 return SDValue();
11135
11136 if (!N0.hasOneUse() || !N1.hasOneUse())
11137 return SDValue();
11138
11140 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11141 ISD::ZERO_EXTEND, SetCCs, TLI))
11142 return SDValue();
11143
11144 // Actually do the transformation.
11145 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11146 Load->getChain(), Load->getBasePtr(),
11147 Load->getMemoryVT(), Load->getMemOperand());
11148
11149 SDLoc DL1(N1);
11150 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11151 N1.getOperand(1));
11152
11154 SDLoc DL0(N0);
11155 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11156 DAG.getConstant(Mask, DL0, VT));
11157
11159 CombineTo(N, And);
11160 if (SDValue(Load, 0).hasOneUse()) {
11161 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11162 } else {
11163 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11164 Load->getValueType(0), ExtLoad);
11165 CombineTo(Load, Trunc, ExtLoad.getValue(1));
11166 }
11167
11168 // N0 is dead at this point.
11169 recursivelyDeleteUnusedNodes(N0.getNode());
11170
11171 return SDValue(N,0); // Return N so it doesn't get rechecked!
11172}
11173
11174/// If we're narrowing or widening the result of a vector select and the final
11175/// size is the same size as a setcc (compare) feeding the select, then try to
11176/// apply the cast operation to the select's operands because matching vector
11177/// sizes for a select condition and other operands should be more efficient.
11178SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11179 unsigned CastOpcode = Cast->getOpcode();
11183 "Unexpected opcode for vector select narrowing/widening");
11184
11185 // We only do this transform before legal ops because the pattern may be
11186 // obfuscated by target-specific operations after legalization. Do not create
11187 // an illegal select op, however, because that may be difficult to lower.
11188 EVT VT = Cast->getValueType(0);
11189 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11190 return SDValue();
11191
11192 SDValue VSel = Cast->getOperand(0);
11193 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11194 VSel.getOperand(0).getOpcode() != ISD::SETCC)
11195 return SDValue();
11196
11197 // Does the setcc have the same vector size as the casted select?
11198 SDValue SetCC = VSel.getOperand(0);
11199 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11200 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11201 return SDValue();
11202
11203 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11204 SDValue A = VSel.getOperand(1);
11205 SDValue B = VSel.getOperand(2);
11207 SDLoc DL(Cast);
11208 if (CastOpcode == ISD::FP_ROUND) {
11209 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11210 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11211 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11212 } else {
11213 CastA = DAG.getNode(CastOpcode, DL, VT, A);
11214 CastB = DAG.getNode(CastOpcode, DL, VT, B);
11215 }
11216 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11217}
11218
11219// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11220// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11222 const TargetLowering &TLI, EVT VT,
11223 bool LegalOperations, SDNode *N,
11225 SDNode *N0Node = N0.getNode();
11228 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11230 return SDValue();
11231
11233 EVT MemVT = LN0->getMemoryVT();
11234 if ((LegalOperations || !LN0->isSimple() ||
11235 VT.isVector()) &&
11236 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11237 return SDValue();
11238
11240 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11241 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11242 Combiner.CombineTo(N, ExtLoad);
11243 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11244 if (LN0->use_empty())
11245 Combiner.recursivelyDeleteUnusedNodes(LN0);
11246 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11247}
11248
11249// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11250// Only generate vector extloads when 1) they're legal, and 2) they are
11251// deemed desirable by the target.
11253 const TargetLowering &TLI, EVT VT,
11254 bool LegalOperations, SDNode *N, SDValue N0,
11257 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11259 ((LegalOperations || VT.isVector() ||
11260 !cast<LoadSDNode>(N0)->isSimple()) &&
11261 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11262 return {};
11263
11264 bool DoXform = true;
11266 if (!N0.hasOneUse())
11267 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11268 if (VT.isVector())
11270 if (!DoXform)
11271 return {};
11272
11274 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11275 LN0->getBasePtr(), N0.getValueType(),
11276 LN0->getMemOperand());
11277 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11278 // If the load value is used only by N, replace it via CombineTo N.
11279 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11280 Combiner.CombineTo(N, ExtLoad);
11281 if (NoReplaceTrunc) {
11282 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11283 Combiner.recursivelyDeleteUnusedNodes(LN0);
11284 } else {
11285 SDValue Trunc =
11287 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11288 }
11289 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11290}
11291
11293 const TargetLowering &TLI, EVT VT,
11294 SDNode *N, SDValue N0,
11297 if (!N0.hasOneUse())
11298 return SDValue();
11299
11301 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11302 return SDValue();
11303
11304 if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11305 return SDValue();
11306
11307 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11308 return SDValue();
11309
11310 SDLoc dl(Ld);
11311 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11313 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11314 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11315 ExtLoadType, Ld->isExpandingLoad());
11316 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11317 return NewLoad;
11318}
11319
11321 bool LegalOperations) {
11322 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11323 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
11324
11325 SDValue SetCC = N->getOperand(0);
11326 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11327 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11328 return SDValue();
11329
11330 SDValue X = SetCC.getOperand(0);
11331 SDValue Ones = SetCC.getOperand(1);
11332 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11333 EVT VT = N->getValueType(0);
11334 EVT XVT = X.getValueType();
11335 // setge X, C is canonicalized to setgt, so we do not need to match that
11336 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11337 // not require the 'not' op.
11338 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11339 // Invert and smear/shift the sign bit:
11340 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11341 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11342 SDLoc DL(N);
11343 unsigned ShCt = VT.getSizeInBits() - 1;
11344 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11345 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11346 SDValue NotX = DAG.getNOT(DL, X, VT);
11347 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11348 auto ShiftOpcode =
11349 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11350 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11351 }
11352 }
11353 return SDValue();
11354}
11355
11356SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11357 SDValue N0 = N->getOperand(0);
11358 if (N0.getOpcode() != ISD::SETCC)
11359 return SDValue();
11360
11361 SDValue N00 = N0.getOperand(0);
11362 SDValue N01 = N0.getOperand(1);
11364 EVT VT = N->getValueType(0);
11365 EVT N00VT = N00.getValueType();
11366 SDLoc DL(N);
11367
11368 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11369 // the same size as the compared operands. Try to optimize sext(setcc())
11370 // if this is the case.
11371 if (VT.isVector() && !LegalOperations &&
11375
11376 // If we already have the desired type, don't change it.
11377 if (SVT != N0.getValueType()) {
11378 // We know that the # elements of the results is the same as the
11379 // # elements of the compare (and the # elements of the compare result
11380 // for that matter). Check to see that they are the same size. If so,
11381 // we know that the element size of the sext'd result matches the
11382 // element size of the compare operands.
11383 if (VT.getSizeInBits() == SVT.getSizeInBits())
11384 return DAG.getSetCC(DL, VT, N00, N01, CC);
11385
11386 // If the desired elements are smaller or larger than the source
11387 // elements, we can use a matching integer vector type and then
11388 // truncate/sign extend.
11389 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11390 if (SVT == MatchingVecType) {
11392 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11393 }
11394 }
11395
11396 // Try to eliminate the sext of a setcc by zexting the compare operands.
11397 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11402
11403 // We have an unsupported narrow vector compare op that would be legal
11404 // if extended to the destination type. See if the compare operands
11405 // can be freely extended to the destination type.
11406 auto IsFreeToExtend = [&](SDValue V) {
11407 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11408 return true;
11409 // Match a simple, non-extended load that can be converted to a
11410 // legal {z/s}ext-load.
11411 // TODO: Allow widening of an existing {z/s}ext-load?
11412 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11413 ISD::isUNINDEXEDLoad(V.getNode()) &&
11414 cast<LoadSDNode>(V)->isSimple() &&
11415 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11416 return false;
11417
11418 // Non-chain users of this value must either be the setcc in this
11419 // sequence or extends that can be folded into the new {z/s}ext-load.
11420 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11421 UI != UE; ++UI) {
11422 // Skip uses of the chain and the setcc.
11423 SDNode *User = *UI;
11424 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11425 continue;
11426 // Extra users must have exactly the same cast we are about to create.
11427 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11428 // is enhanced similarly.
11429 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11430 return false;
11431 }
11432 return true;
11433 };
11434
11436 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11437 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11438 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11439 }
11440 }
11441 }
11442
11443 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11444 // Here, T can be 1 or -1, depending on the type of the setcc and
11445 // getBooleanContents().
11446 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11447
11448 // To determine the "true" side of the select, we need to know the high bit
11449 // of the value returned by the setcc if it evaluates to true.
11450 // If the type of the setcc is i1, then the true case of the select is just
11451 // sext(i1 1), that is, -1.
11452 // If the type of the setcc is larger (say, i8) then the value of the high
11453 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11454 // of the appropriate width.
11456 ? DAG.getAllOnesConstant(DL, VT)
11457 : DAG.getBoolConstant(true, DL, VT, N00VT);
11458 SDValue Zero = DAG.getConstant(0, DL, VT);
11459 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11460 return SCC;
11461
11462 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11464 // Don't do this transform for i1 because there's a select transform
11465 // that would reverse it.
11466 // TODO: We should not do this transform at all without a target hook
11467 // because a sext is likely cheaper than a select?
11468 if (SetCCVT.getScalarSizeInBits() != 1 &&
11469 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11470 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11471 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11472 }
11473 }
11474
11475 return SDValue();
11476}
11477
11478SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11479 SDValue N0 = N->getOperand(0);
11480 EVT VT = N->getValueType(0);
11481 SDLoc DL(N);
11482
11483 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11484 return Res;
11485
11486 // fold (sext (sext x)) -> (sext x)
11487 // fold (sext (aext x)) -> (sext x)
11488 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11489 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11490
11491 if (N0.getOpcode() == ISD::TRUNCATE) {
11492 // fold (sext (truncate (load x))) -> (sext (smaller load x))
11493 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11495 SDNode *oye = N0.getOperand(0).getNode();
11496 if (NarrowLoad.getNode() != N0.getNode()) {
11497 CombineTo(N0.getNode(), NarrowLoad);
11498 // CombineTo deleted the truncate, if needed, but not what's under it.
11499 AddToWorklist(oye);
11500 }
11501 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11502 }
11503
11504 // See if the value being truncated is already sign extended. If so, just
11505 // eliminate the trunc/sext pair.
11506 SDValue Op = N0.getOperand(0);
11507 unsigned OpBits = Op.getScalarValueSizeInBits();
11508 unsigned MidBits = N0.getScalarValueSizeInBits();
11509 unsigned DestBits = VT.getScalarSizeInBits();
11510 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11511
11512 if (OpBits == DestBits) {
11513 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
11514 // bits, it is already ready.
11515 if (NumSignBits > DestBits-MidBits)
11516 return Op;
11517 } else if (OpBits < DestBits) {
11518 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
11519 // bits, just sext from i32.
11520 if (NumSignBits > OpBits-MidBits)
11521 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11522 } else {
11523 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
11524 // bits, just truncate to i32.
11525 if (NumSignBits > OpBits-MidBits)
11526 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11527 }
11528
11529 // fold (sext (truncate x)) -> (sextinreg x).
11530 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11531 N0.getValueType())) {
11532 if (OpBits < DestBits)
11533 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11534 else if (OpBits > DestBits)
11535 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11536 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11537 DAG.getValueType(N0.getValueType()));
11538 }
11539 }
11540
11541 // Try to simplify (sext (load x)).
11542 if (SDValue foldedExt =
11543 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11545 return foldedExt;
11546
11547 if (SDValue foldedExt =
11548 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11550 return foldedExt;
11551
11552 // fold (sext (load x)) to multiple smaller sextloads.
11553 // Only on illegal but splittable vectors.
11555 return ExtLoad;
11556
11557 // Try to simplify (sext (sextload x)).
11559 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11560 return foldedExt;
11561
11562 // fold (sext (and/or/xor (load x), cst)) ->
11563 // (and/or/xor (sextload x), (sext cst))
11564 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11565 N0.getOpcode() == ISD::XOR) &&
11566 isa<LoadSDNode>(N0.getOperand(0)) &&
11567 N0.getOperand(1).getOpcode() == ISD::Constant &&
11568 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11570 EVT MemVT = LN00->getMemoryVT();
11571 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11572 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11574 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11575 ISD::SIGN_EXTEND, SetCCs, TLI);
11576 if (DoXform) {
11578 LN00->getChain(), LN00->getBasePtr(),
11579 LN00->getMemoryVT(),
11580 LN00->getMemOperand());
11582 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11583 ExtLoad, DAG.getConstant(Mask, DL, VT));
11585 bool NoReplaceTruncAnd = !N0.hasOneUse();
11586 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11587 CombineTo(N, And);
11588 // If N0 has multiple uses, change other uses as well.
11589 if (NoReplaceTruncAnd) {
11592 CombineTo(N0.getNode(), TruncAnd);
11593 }
11594 if (NoReplaceTrunc) {
11595 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11596 } else {
11597 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11598 LN00->getValueType(0), ExtLoad);
11599 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11600 }
11601 return SDValue(N,0); // Return N so it doesn't get rechecked!
11602 }
11603 }
11604 }
11605
11606 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11607 return V;
11608
11609 if (SDValue V = foldSextSetcc(N))
11610 return V;
11611
11612 // fold (sext x) -> (zext x) if the sign bit is known zero.
11613 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11614 DAG.SignBitIsZero(N0))
11615 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11616
11618 return NewVSel;
11619
11620 // Eliminate this sign extend by doing a negation in the destination type:
11621 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11622 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11626 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11627 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11628 }
11629 // Eliminate this sign extend by doing a decrement in the destination type:
11630 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11631 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11635 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11636 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11637 }
11638
11639 // fold sext (not i1 X) -> add (zext i1 X), -1
11640 // TODO: This could be extended to handle bool vectors.
11641 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11642 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11643 TLI.isOperationLegal(ISD::ADD, VT)))) {
11644 // If we can eliminate the 'not', the sext form should be better
11645 if (SDValue NewXor = visitXOR(N0.getNode())) {
11646 // Returning N0 is a form of in-visit replacement that may have
11647 // invalidated N0.
11648 if (NewXor.getNode() == N0.getNode()) {
11649 // Return SDValue here as the xor should have already been replaced in
11650 // this sext.
11651 return SDValue();
11652 } else {
11653 // Return a new sext with the new xor.
11654 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11655 }
11656 }
11657
11658 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11659 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11660 }
11661
11662 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11663 return Res;
11664
11665 return SDValue();
11666}
11667
11668// isTruncateOf - If N is a truncate of some other value, return true, record
11669// the value being truncated in Op and which of Op's bits are zero/one in Known.
11670// This function computes KnownBits to avoid a duplicated call to
11671// computeKnownBits in the caller.
11673 KnownBits &Known) {
11674 if (N->getOpcode() == ISD::TRUNCATE) {
11675 Op = N->getOperand(0);
11676 Known = DAG.computeKnownBits(Op);
11677 return true;
11678 }
11679
11680 if (N.getOpcode() != ISD::SETCC ||
11681 N.getValueType().getScalarType() != MVT::i1 ||
11682 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11683 return false;
11684
11685 SDValue Op0 = N->getOperand(0);
11686 SDValue Op1 = N->getOperand(1);
11687 assert(Op0.getValueType() == Op1.getValueType());
11688
11689 if (isNullOrNullSplat(Op0))
11690 Op = Op1;
11691 else if (isNullOrNullSplat(Op1))
11692 Op = Op0;
11693 else
11694 return false;
11695
11696 Known = DAG.computeKnownBits(Op);
11697
11698 return (Known.Zero | 1).isAllOnes();
11699}
11700
11701/// Given an extending node with a pop-count operand, if the target does not
11702/// support a pop-count in the narrow source type but does support it in the
11703/// destination type, widen the pop-count to the destination type.
11705 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11706 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11707
11708 SDValue CtPop = Extend->getOperand(0);
11709 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11710 return SDValue();
11711
11712 EVT VT = Extend->getValueType(0);
11713 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11714 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11716 return SDValue();
11717
11718 // zext (ctpop X) --> ctpop (zext X)
11719 SDLoc DL(Extend);
11720 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11721 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11722}
11723
11724SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11725 SDValue N0 = N->getOperand(0);
11726 EVT VT = N->getValueType(0);
11727
11728 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11729 return Res;
11730
11731 // fold (zext (zext x)) -> (zext x)
11732 // fold (zext (aext x)) -> (zext x)
11733 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11734 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11735 N0.getOperand(0));
11736
11737 // fold (zext (truncate x)) -> (zext x) or
11738 // (zext (truncate x)) -> (truncate x)
11739 // This is valid when the truncated bits of x are already zero.
11740 SDValue Op;
11741 KnownBits Known;
11742 if (isTruncateOf(DAG, N0, Op, Known)) {
11744 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11745 APInt(Op.getScalarValueSizeInBits(), 0) :
11746 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11747 N0.getScalarValueSizeInBits(),
11748 std::min(Op.getScalarValueSizeInBits(),
11749 VT.getScalarSizeInBits()));
11750 if (TruncatedBits.isSubsetOf(Known.Zero))
11751 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11752 }
11753
11754 // fold (zext (truncate x)) -> (and x, mask)
11755 if (N0.getOpcode() == ISD::TRUNCATE) {
11756 // fold (zext (truncate (load x))) -> (zext (smaller load x))
11757 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11759 SDNode *oye = N0.getOperand(0).getNode();
11760 if (NarrowLoad.getNode() != N0.getNode()) {
11761 CombineTo(N0.getNode(), NarrowLoad);
11762 // CombineTo deleted the truncate, if needed, but not what's under it.
11763 AddToWorklist(oye);
11764 }
11765 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11766 }
11767
11768 EVT SrcVT = N0.getOperand(0).getValueType();
11769 EVT MinVT = N0.getValueType();
11770
11771 // Try to mask before the extension to avoid having to generate a larger mask,
11772 // possibly over several sub-vectors.
11773 if (SrcVT.bitsLT(VT) && VT.isVector()) {
11774 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11776 SDValue Op = N0.getOperand(0);
11777 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11778 AddToWorklist(Op.getNode());
11779 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11780 // Transfer the debug info; the new node is equivalent to N0.
11782 return ZExtOrTrunc;
11783 }
11784 }
11785
11786 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11787 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11788 AddToWorklist(Op.getNode());
11790 // We may safely transfer the debug info describing the truncate node over
11791 // to the equivalent and operation.
11792 DAG.transferDbgValues(N0, And);
11793 return And;
11794 }
11795 }
11796
11797 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11798 // if either of the casts is not free.
11799 if (N0.getOpcode() == ISD::AND &&
11800 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11801 N0.getOperand(1).getOpcode() == ISD::Constant &&
11803 N0.getValueType()) ||
11804 !TLI.isZExtFree(N0.getValueType(), VT))) {
11805 SDValue X = N0.getOperand(0).getOperand(0);
11806 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11808 SDLoc DL(N);
11809 return DAG.getNode(ISD::AND, DL, VT,
11810 X, DAG.getConstant(Mask, DL, VT));
11811 }
11812
11813 // Try to simplify (zext (load x)).
11814 if (SDValue foldedExt =
11815 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11817 return foldedExt;
11818
11819 if (SDValue foldedExt =
11820 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11822 return foldedExt;
11823
11824 // fold (zext (load x)) to multiple smaller zextloads.
11825 // Only on illegal but splittable vectors.
11827 return ExtLoad;
11828
11829 // fold (zext (and/or/xor (load x), cst)) ->
11830 // (and/or/xor (zextload x), (zext cst))
11831 // Unless (and (load x) cst) will match as a zextload already and has
11832 // additional users.
11833 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11834 N0.getOpcode() == ISD::XOR) &&
11835 isa<LoadSDNode>(N0.getOperand(0)) &&
11836 N0.getOperand(1).getOpcode() == ISD::Constant &&
11837 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11839 EVT MemVT = LN00->getMemoryVT();
11840 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11841 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11842 bool DoXform = true;
11844 if (!N0.hasOneUse()) {
11845 if (N0.getOpcode() == ISD::AND) {
11846 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11847 EVT LoadResultTy = AndC->getValueType(0);
11848 EVT ExtVT;
11850 DoXform = false;
11851 }
11852 }
11853 if (DoXform)
11855 ISD::ZERO_EXTEND, SetCCs, TLI);
11856 if (DoXform) {
11858 LN00->getChain(), LN00->getBasePtr(),
11859 LN00->getMemoryVT(),
11860 LN00->getMemOperand());
11862 SDLoc DL(N);
11863 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11864 ExtLoad, DAG.getConstant(Mask, DL, VT));
11866 bool NoReplaceTruncAnd = !N0.hasOneUse();
11867 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11868 CombineTo(N, And);
11869 // If N0 has multiple uses, change other uses as well.
11870 if (NoReplaceTruncAnd) {
11873 CombineTo(N0.getNode(), TruncAnd);
11874 }
11875 if (NoReplaceTrunc) {
11876 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11877 } else {
11878 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11879 LN00->getValueType(0), ExtLoad);
11880 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11881 }
11882 return SDValue(N,0); // Return N so it doesn't get rechecked!
11883 }
11884 }
11885 }
11886
11887 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11888 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11890 return ZExtLoad;
11891
11892 // Try to simplify (zext (zextload x)).
11894 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11895 return foldedExt;
11896
11897 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11898 return V;
11899
11900 if (N0.getOpcode() == ISD::SETCC) {
11901 // Only do this before legalize for now.
11902 if (!LegalOperations && VT.isVector() &&
11904 EVT N00VT = N0.getOperand(0).getValueType();
11906 return SDValue();
11907
11908 // We know that the # elements of the results is the same as the #
11909 // elements of the compare (and the # elements of the compare result for
11910 // that matter). Check to see that they are the same size. If so, we know
11911 // that the element size of the sext'd result matches the element size of
11912 // the compare operands.
11913 SDLoc DL(N);
11914 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11915 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11916 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11917 N0.getOperand(1), N0.getOperand(2));
11918 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11919 }
11920
11921 // If the desired elements are smaller or larger than the source
11922 // elements we can use a matching integer vector type and then
11923 // truncate/any extend followed by zext_in_reg.
11924 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11925 SDValue VsetCC =
11927 N0.getOperand(1), N0.getOperand(2));
11928 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11929 N0.getValueType());
11930 }
11931
11932 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11933 SDLoc DL(N);
11934 EVT N0VT = N0.getValueType();
11935 EVT N00VT = N0.getOperand(0).getValueType();
11936 if (SDValue SCC = SimplifySelectCC(
11937 DL, N0.getOperand(0), N0.getOperand(1),
11938 DAG.getBoolConstant(true, DL, N0VT, N00VT),
11939 DAG.getBoolConstant(false, DL, N0VT, N00VT),
11940 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11941 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11942 }
11943
11944 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11945 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11948 N0.hasOneUse()) {
11949 SDValue ShAmt = N0.getOperand(1);
11950 if (N0.getOpcode() == ISD::SHL) {
11952 // If the original shl may be shifting out bits, do not perform this
11953 // transformation.
11954 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11955 InnerZExt.getOperand(0).getValueSizeInBits();
11956 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11957 return SDValue();
11958 }
11959
11960 SDLoc DL(N);
11961
11962 // Ensure that the shift amount is wide enough for the shifted value.
11963 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11964 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11965
11966 return DAG.getNode(N0.getOpcode(), DL, VT,
11967 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11968 ShAmt);
11969 }
11970
11972 return NewVSel;
11973
11974 if (SDValue NewCtPop = widenCtPop(N, DAG))
11975 return NewCtPop;
11976
11977 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11978 return Res;
11979
11980 return SDValue();
11981}
11982
11983SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11984 SDValue N0 = N->getOperand(0);
11985 EVT VT = N->getValueType(0);
11986
11987 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11988 return Res;
11989
11990 // fold (aext (aext x)) -> (aext x)
11991 // fold (aext (zext x)) -> (zext x)
11992 // fold (aext (sext x)) -> (sext x)
11993 if (N0.getOpcode() == ISD::ANY_EXTEND ||
11994 N0.getOpcode() == ISD::ZERO_EXTEND ||
11996 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11997
11998 // fold (aext (truncate (load x))) -> (aext (smaller load x))
11999 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
12000 if (N0.getOpcode() == ISD::TRUNCATE) {
12002 SDNode *oye = N0.getOperand(0).getNode();
12003 if (NarrowLoad.getNode() != N0.getNode()) {
12004 CombineTo(N0.getNode(), NarrowLoad);
12005 // CombineTo deleted the truncate, if needed, but not what's under it.
12006 AddToWorklist(oye);
12007 }
12008 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12009 }
12010 }
12011
12012 // fold (aext (truncate x))
12013 if (N0.getOpcode() == ISD::TRUNCATE)
12014 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12015
12016 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
12017 // if the trunc is not free.
12018 if (N0.getOpcode() == ISD::AND &&
12019 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12020 N0.getOperand(1).getOpcode() == ISD::Constant &&
12022 N0.getValueType())) {
12023 SDLoc DL(N);
12024 SDValue X = N0.getOperand(0).getOperand(0);
12025 X = DAG.getAnyExtOrTrunc(X, DL, VT);
12027 return DAG.getNode(ISD::AND, DL, VT,
12028 X, DAG.getConstant(Mask, DL, VT));
12029 }
12030
12031 // fold (aext (load x)) -> (aext (truncate (extload x)))
12032 // None of the supported targets knows how to perform load and any_ext
12033 // on vectors in one instruction, so attempt to fold to zext instead.
12034 if (VT.isVector()) {
12035 // Try to simplify (zext (load x)).
12036 if (SDValue foldedExt =
12037 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12039 return foldedExt;
12040 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
12042 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12043 bool DoXform = true;
12045 if (!N0.hasOneUse())
12046 DoXform =
12048 if (DoXform) {
12051 LN0->getChain(), LN0->getBasePtr(),
12052 N0.getValueType(), LN0->getMemOperand());
12054 // If the load value is used only by N, replace it via CombineTo N.
12055 bool NoReplaceTrunc = N0.hasOneUse();
12056 CombineTo(N, ExtLoad);
12057 if (NoReplaceTrunc) {
12058 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12059 recursivelyDeleteUnusedNodes(LN0);
12060 } else {
12061 SDValue Trunc =
12063 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
12064 }
12065 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12066 }
12067 }
12068
12069 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
12070 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
12071 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
12072 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
12073 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
12075 ISD::LoadExtType ExtType = LN0->getExtensionType();
12076 EVT MemVT = LN0->getMemoryVT();
12077 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
12078 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
12079 VT, LN0->getChain(), LN0->getBasePtr(),
12080 MemVT, LN0->getMemOperand());
12081 CombineTo(N, ExtLoad);
12082 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12083 recursivelyDeleteUnusedNodes(LN0);
12084 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12085 }
12086 }
12087
12088 if (N0.getOpcode() == ISD::SETCC) {
12089 // For vectors:
12090 // aext(setcc) -> vsetcc
12091 // aext(setcc) -> truncate(vsetcc)
12092 // aext(setcc) -> aext(vsetcc)
12093 // Only do this before legalize for now.
12094 if (VT.isVector() && !LegalOperations) {
12095 EVT N00VT = N0.getOperand(0).getValueType();
12097 return SDValue();
12098
12099 // We know that the # elements of the results is the same as the
12100 // # elements of the compare (and the # elements of the compare result
12101 // for that matter). Check to see that they are the same size. If so,
12102 // we know that the element size of the sext'd result matches the
12103 // element size of the compare operands.
12104 if (VT.getSizeInBits() == N00VT.getSizeInBits())
12105 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
12106 N0.getOperand(1),
12107 cast<CondCodeSDNode>(N0.getOperand(2))->get());
12108
12109 // If the desired elements are smaller or larger than the source
12110 // elements we can use a matching integer vector type and then
12111 // truncate/any extend
12112 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12113 SDValue VsetCC =
12115 N0.getOperand(1),
12116 cast<CondCodeSDNode>(N0.getOperand(2))->get());
12117 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12118 }
12119
12120 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12121 SDLoc DL(N);
12122 if (SDValue SCC = SimplifySelectCC(
12123 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12124 DAG.getConstant(0, DL, VT),
12125 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12126 return SCC;
12127 }
12128
12129 if (SDValue NewCtPop = widenCtPop(N, DAG))
12130 return NewCtPop;
12131
12132 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12133 return Res;
12134
12135 return SDValue();
12136}
12137
12138SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12139 unsigned Opcode = N->getOpcode();
12140 SDValue N0 = N->getOperand(0);
12141 SDValue N1 = N->getOperand(1);
12142 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12143
12144 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12145 if (N0.getOpcode() == Opcode &&
12146 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12147 return N0;
12148
12149 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12150 N0.getOperand(0).getOpcode() == Opcode) {
12151 // We have an assert, truncate, assert sandwich. Make one stronger assert
12152 // by asserting on the smallest asserted type to the larger source type.
12153 // This eliminates the later assert:
12154 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12155 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12156 SDValue BigA = N0.getOperand(0);
12157 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12158 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12159 "Asserting zero/sign-extended bits to a type larger than the "
12160 "truncated destination does not provide information");
12161
12162 SDLoc DL(N);
12165 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12166 BigA.getOperand(0), MinAssertVTVal);
12167 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12168 }
12169
12170 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12171 // than X. Just move the AssertZext in front of the truncate and drop the
12172 // AssertSExt.
12173 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12175 Opcode == ISD::AssertZext) {
12176 SDValue BigA = N0.getOperand(0);
12177 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12178 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12179 "Asserting zero/sign-extended bits to a type larger than the "
12180 "truncated destination does not provide information");
12181
12182 if (AssertVT.bitsLT(BigA_AssertVT)) {
12183 SDLoc DL(N);
12184 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12185 BigA.getOperand(0), N1);
12186 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12187 }
12188 }
12189
12190 return SDValue();
12191}
12192
12193SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12194 SDLoc DL(N);
12195
12196 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12197 SDValue N0 = N->getOperand(0);
12198
12199 // Fold (assertalign (assertalign x, AL0), AL1) ->
12200 // (assertalign x, max(AL0, AL1))
12201 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12202 return DAG.getAssertAlign(DL, N0.getOperand(0),
12203 std::max(AL, AAN->getAlign()));
12204
12205 // In rare cases, there are trivial arithmetic ops in source operands. Sink
12206 // this assert down to source operands so that those arithmetic ops could be
12207 // exposed to the DAG combining.
12208 switch (N0.getOpcode()) {
12209 default:
12210 break;
12211 case ISD::ADD:
12212 case ISD::SUB: {
12213 unsigned AlignShift = Log2(AL);
12214 SDValue LHS = N0.getOperand(0);
12215 SDValue RHS = N0.getOperand(1);
12220 LHS = DAG.getAssertAlign(DL, LHS, AL);
12222 RHS = DAG.getAssertAlign(DL, RHS, AL);
12223 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12224 }
12225 break;
12226 }
12227 }
12228
12229 return SDValue();
12230}
12231
12232/// If the result of a load is shifted/masked/truncated to an effectively
12233/// narrower type, try to transform the load to a narrower type and/or
12234/// use an extending load.
12235SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12236 unsigned Opc = N->getOpcode();
12237
12239 SDValue N0 = N->getOperand(0);
12240 EVT VT = N->getValueType(0);
12241 EVT ExtVT = VT;
12242
12243 // This transformation isn't valid for vector loads.
12244 if (VT.isVector())
12245 return SDValue();
12246
12247 // The ShAmt variable is used to indicate that we've consumed a right
12248 // shift. I.e. we want to narrow the width of the load by skipping to load the
12249 // ShAmt least significant bits.
12250 unsigned ShAmt = 0;
12251 // A special case is when the least significant bits from the load are masked
12252 // away, but using an AND rather than a right shift. HasShiftedOffset is used
12253 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12254 // the result.
12255 bool HasShiftedOffset = false;
12256 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12257 // extended to VT.
12258 if (Opc == ISD::SIGN_EXTEND_INREG) {
12259 ExtType = ISD::SEXTLOAD;
12260 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12261 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
12262 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
12263 // value, or it may be shifting a higher subword, half or byte into the
12264 // lowest bits.
12265
12266 // Only handle shift with constant shift amount, and the shiftee must be a
12267 // load.
12268 auto *LN = dyn_cast<LoadSDNode>(N0);
12269 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12270 if (!N1C || !LN)
12271 return SDValue();
12272 // If the shift amount is larger than the memory type then we're not
12273 // accessing any of the loaded bytes.
12274 ShAmt = N1C->getZExtValue();
12275 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12276 if (MemoryWidth <= ShAmt)
12277 return SDValue();
12278 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
12279 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
12280 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12281 // If original load is a SEXTLOAD then we can't simply replace it by a
12282 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12283 // followed by a ZEXT, but that is not handled at the moment). Similarly if
12284 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
12285 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
12286 LN->getExtensionType() == ISD::ZEXTLOAD) &&
12287 LN->getExtensionType() != ExtType)
12288 return SDValue();
12289 } else if (Opc == ISD::AND) {
12290 // An AND with a constant mask is the same as a truncate + zero-extend.
12291 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12292 if (!AndC)
12293 return SDValue();
12294
12295 const APInt &Mask = AndC->getAPIntValue();
12296 unsigned ActiveBits = 0;
12297 if (Mask.isMask()) {
12298 ActiveBits = Mask.countTrailingOnes();
12299 } else if (Mask.isShiftedMask()) {
12300 ShAmt = Mask.countTrailingZeros();
12301 APInt ShiftedMask = Mask.lshr(ShAmt);
12302 ActiveBits = ShiftedMask.countTrailingOnes();
12303 HasShiftedOffset = true;
12304 } else
12305 return SDValue();
12306
12307 ExtType = ISD::ZEXTLOAD;
12309 }
12310
12311 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12312 // a right shift. Here we redo some of those checks, to possibly adjust the
12313 // ExtVT even further based on "a masking AND". We could also end up here for
12314 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12315 // need to be done here as well.
12316 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12317 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12318 // Bail out when the SRL has more than one use. This is done for historical
12319 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12320 // check below? And maybe it could be non-profitable to do the transform in
12321 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12322 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12323 if (!SRL.hasOneUse())
12324 return SDValue();
12325
12326 // Only handle shift with constant shift amount, and the shiftee must be a
12327 // load.
12328 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12329 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12330 if (!SRL1C || !LN)
12331 return SDValue();
12332
12333 // If the shift amount is larger than the input type then we're not
12334 // accessing any of the loaded bytes. If the load was a zextload/extload
12335 // then the result of the shift+trunc is zero/undef (handled elsewhere).
12336 ShAmt = SRL1C->getZExtValue();
12337 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
12338 if (ShAmt >= MemoryWidth)
12339 return SDValue();
12340
12341 // Because a SRL must be assumed to *need* to zero-extend the high bits
12342 // (as opposed to anyext the high bits), we can't combine the zextload
12343 // lowering of SRL and an sextload.
12344 if (LN->getExtensionType() == ISD::SEXTLOAD)
12345 return SDValue();
12346
12347 // Avoid reading outside the memory accessed by the original load (could
12348 // happened if we only adjust the load base pointer by ShAmt). Instead we
12349 // try to narrow the load even further. The typical scenario here is:
12350 // (i64 (truncate (i96 (srl (load x), 64)))) ->
12351 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
12352 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
12353 // Don't replace sextload by zextload.
12354 if (ExtType == ISD::SEXTLOAD)
12355 return SDValue();
12356 // Narrow the load.
12357 ExtType = ISD::ZEXTLOAD;
12358 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12359 }
12360
12361 // If the SRL is only used by a masking AND, we may be able to adjust
12362 // the ExtVT to make the AND redundant.
12363 SDNode *Mask = *(SRL->use_begin());
12364 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12365 isa<ConstantSDNode>(Mask->getOperand(1))) {
12366 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12367 if (ShiftMask.isMask()) {
12369 ShiftMask.countTrailingOnes());
12370 // If the mask is smaller, recompute the type.
12371 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
12372 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12373 ExtVT = MaskedVT;
12374 }
12375 }
12376
12377 N0 = SRL.getOperand(0);
12378 }
12379
12380 // If the load is shifted left (and the result isn't shifted back right), we
12381 // can fold a truncate through the shift. The typical scenario is that N
12382 // points at a TRUNCATE here so the attempted fold is:
12383 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12384 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12385 unsigned ShLeftAmt = 0;
12386 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12387 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12389 ShLeftAmt = N01->getZExtValue();
12390 N0 = N0.getOperand(0);
12391 }
12392 }
12393
12394 // If we haven't found a load, we can't narrow it.
12395 if (!isa<LoadSDNode>(N0))
12396 return SDValue();
12397
12399 // Reducing the width of a volatile load is illegal. For atomics, we may be
12400 // able to reduce the width provided we never widen again. (see D66309)
12401 if (!LN0->isSimple() ||
12402 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12403 return SDValue();
12404
12405 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12406 unsigned LVTStoreBits =
12407 LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
12408 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12409 return LVTStoreBits - EVTStoreBits - ShAmt;
12410 };
12411
12412 // We need to adjust the pointer to the load by ShAmt bits in order to load
12413 // the correct bytes.
12414 unsigned PtrAdjustmentInBits =
12415 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12416
12418 Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12419 SDLoc DL(LN0);
12420 // The original load itself didn't wrap, so an offset within it doesn't.
12421 SDNodeFlags Flags;
12422 Flags.setNoUnsignedWrap(true);
12423 SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12424 TypeSize::Fixed(PtrOff), DL, Flags);
12425 AddToWorklist(NewPtr.getNode());
12426
12427 SDValue Load;
12428 if (ExtType == ISD::NON_EXTLOAD)
12429 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12430 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12431 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12432 else
12433 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12434 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12435 NewAlign, LN0->getMemOperand()->getFlags(),
12436 LN0->getAAInfo());
12437
12438 // Replace the old load's chain with the new load's chain.
12439 WorklistRemover DeadNodes(*this);
12440 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12441
12442 // Shift the result left, if we've swallowed a left shift.
12444 if (ShLeftAmt != 0) {
12445 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12446 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12447 ShImmTy = VT;
12448 // If the shift amount is as large as the result size (but, presumably,
12449 // no larger than the source) then the useful bits of the result are
12450 // zero; we can't simply return the shortened shift, because the result
12451 // of that operation is undefined.
12452 if (ShLeftAmt >= VT.getScalarSizeInBits())
12453 Result = DAG.getConstant(0, DL, VT);
12454 else
12455 Result = DAG.getNode(ISD::SHL, DL, VT,
12456 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12457 }
12458
12459 if (HasShiftedOffset) {
12460 // We're using a shifted mask, so the load now has an offset. This means
12461 // that data has been loaded into the lower bytes than it would have been
12462 // before, so we need to shl the loaded data into the correct position in the
12463 // register.
12464 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12465 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12466 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12467 }
12468
12469 // Return the new loaded value.
12470 return Result;
12471}
12472
12473SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12474 SDValue N0 = N->getOperand(0);
12475 SDValue N1 = N->getOperand(1);
12476 EVT VT = N->getValueType(0);
12477 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12478 unsigned VTBits = VT.getScalarSizeInBits();
12479 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12480
12481 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12482 if (N0.isUndef())
12483 return DAG.getConstant(0, SDLoc(N), VT);
12484
12485 // fold (sext_in_reg c1) -> c1
12487 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12488
12489 // If the input is already sign extended, just drop the extension.
12490 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12491 return N0;
12492
12493 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12494 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12495 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12496 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12497 N1);
12498
12499 // fold (sext_in_reg (sext x)) -> (sext x)
12500 // fold (sext_in_reg (aext x)) -> (sext x)
12501 // if x is small enough or if we know that x has more than 1 sign bit and the
12502 // sign_extend_inreg is extending from one of them.
12503 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12504 SDValue N00 = N0.getOperand(0);
12505 unsigned N00Bits = N00.getScalarValueSizeInBits();
12506 if ((N00Bits <= ExtVTBits ||
12508 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12509 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12510 }
12511
12512 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12513 // if x is small enough or if we know that x has more than 1 sign bit and the
12514 // sign_extend_inreg is extending from one of them.
12518 SDValue N00 = N0.getOperand(0);
12519 unsigned N00Bits = N00.getScalarValueSizeInBits();
12521 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12524 if ((N00Bits == ExtVTBits ||
12525 (!IsZext && (N00Bits < ExtVTBits ||
12527 (!LegalOperations ||
12529 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12530 }
12531
12532 // fold (sext_in_reg (zext x)) -> (sext x)
12533 // iff we are extending the source sign bit.
12534 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12535 SDValue N00 = N0.getOperand(0);
12536 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12537 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12538 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12539 }
12540
12541 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12543 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12544
12545 // fold operands of sext_in_reg based on knowledge that the top bits are not
12546 // demanded.
12548 return SDValue(N, 0);
12549
12550 // fold (sext_in_reg (load x)) -> (smaller sextload x)
12551 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12553 return NarrowLoad;
12554
12555 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12556 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12557 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12558 if (N0.getOpcode() == ISD::SRL) {
12559 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12560 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12561 // We can turn this into an SRA iff the input to the SRL is already sign
12562 // extended enough.
12563 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12564 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12565 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12566 N0.getOperand(1));
12567 }
12568 }
12569
12570 // fold (sext_inreg (extload x)) -> (sextload x)
12571 // If sextload is not supported by target, we can only do the combine when
12572 // load has one use. Doing otherwise can block folding the extload with other
12573 // extends that the target does support.
12574 if (ISD::isEXTLoad(N0.getNode()) &&
12576 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12577 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12578 N0.hasOneUse()) ||
12579 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12582 LN0->getChain(),
12583 LN0->getBasePtr(), ExtVT,
12584 LN0->getMemOperand());
12585 CombineTo(N, ExtLoad);
12586 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12587 AddToWorklist(ExtLoad.getNode());
12588 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12589 }
12590
12591 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12593 N0.hasOneUse() &&
12594 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12595 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12596 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12599 LN0->getChain(),
12600 LN0->getBasePtr(), ExtVT,
12601 LN0->getMemOperand());
12602 CombineTo(N, ExtLoad);
12603 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12604 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12605 }
12606
12607 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12608 // ignore it if the masked load is already sign extended
12610 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12611 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12614 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12615 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12616 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12617 CombineTo(N, ExtMaskedLoad);
12618 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12619 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12620 }
12621 }
12622
12623 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12624 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12625 if (SDValue(GN0, 0).hasOneUse() &&
12626 ExtVT == GN0->getMemoryVT() &&
12628 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
12629 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
12630
12632 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12633 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12634
12635 CombineTo(N, ExtLoad);
12636 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12637 AddToWorklist(ExtLoad.getNode());
12638 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12639 }
12640 }
12641
12642 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12643 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12645 N0.getOperand(1), false))
12646 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12647 }
12648
12649 return SDValue();
12650}
12651
12652SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12653 SDValue N0 = N->getOperand(0);
12654 EVT VT = N->getValueType(0);
12655
12656 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12657 if (N0.isUndef())
12658 return DAG.getConstant(0, SDLoc(N), VT);
12659
12660 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12661 return Res;
12662
12664 return SDValue(N, 0);
12665
12666 return SDValue();
12667}
12668
12669SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12670 SDValue N0 = N->getOperand(0);
12671 EVT VT = N->getValueType(0);
12672 EVT SrcVT = N0.getValueType();
12673 bool isLE = DAG.getDataLayout().isLittleEndian();
12674
12675 // noop truncate
12676 if (SrcVT == VT)
12677 return N0;
12678
12679 // fold (truncate (truncate x)) -> (truncate x)
12680 if (N0.getOpcode() == ISD::TRUNCATE)
12681 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12682
12683 // fold (truncate c1) -> c1
12685 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12686 if (C.getNode() != N)
12687 return C;
12688 }
12689
12690 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12691 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12692 N0.getOpcode() == ISD::SIGN_EXTEND ||
12693 N0.getOpcode() == ISD::ANY_EXTEND) {
12694 // if the source is smaller than the dest, we still need an extend.
12695 if (N0.getOperand(0).getValueType().bitsLT(VT))
12696 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12697 // if the source is larger than the dest, than we just need the truncate.
12698 if (N0.getOperand(0).getValueType().bitsGT(VT))
12699 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12700 // if the source and dest are the same type, we can drop both the extend
12701 // and the truncate.
12702 return N0.getOperand(0);
12703 }
12704
12705 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12706 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12707 return SDValue();
12708
12709 // Fold extract-and-trunc into a narrow extract. For example:
12710 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12711 // i32 y = TRUNCATE(i64 x)
12712 // -- becomes --
12713 // v16i8 b = BITCAST (v2i64 val)
12714 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12715 //
12716 // Note: We only run this optimization after type legalization (which often
12717 // creates this pattern) and before operation legalization after which
12718 // we need to be more careful about the vector instructions that we generate.
12719 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12720 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12721 EVT VecTy = N0.getOperand(0).getValueType();
12722 EVT ExTy = N0.getValueType();
12723 EVT TrTy = N->getValueType(0);
12724
12725 auto EltCnt = VecTy.getVectorElementCount();
12726 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12727 auto NewEltCnt = EltCnt * SizeRatio;
12728
12730 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12731
12732 SDValue EltNo = N0->getOperand(1);
12734 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12735 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12736
12737 SDLoc DL(N);
12739 DAG.getBitcast(NVT, N0.getOperand(0)),
12740 DAG.getVectorIdxConstant(Index, DL));
12741 }
12742 }
12743
12744 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12745 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12746 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12747 TLI.isTruncateFree(SrcVT, VT)) {
12748 SDLoc SL(N0);
12749 SDValue Cond = N0.getOperand(0);
12750 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12751 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12752 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12753 }
12754 }
12755
12756 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12757 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12758 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12759 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12760 SDValue Amt = N0.getOperand(1);
12761 KnownBits Known = DAG.computeKnownBits(Amt);
12762 unsigned Size = VT.getScalarSizeInBits();
12763 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
12764 SDLoc SL(N);
12765 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12766
12767 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12768 if (AmtVT != Amt.getValueType()) {
12769 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12770 AddToWorklist(Amt.getNode());
12771 }
12772 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12773 }
12774 }
12775
12776 if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12777 return V;
12778
12779 // Attempt to pre-truncate BUILD_VECTOR sources.
12780 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12781 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12782 // Avoid creating illegal types if running after type legalizer.
12783 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12784 SDLoc DL(N);
12785 EVT SVT = VT.getScalarType();
12787 for (const SDValue &Op : N0->op_values()) {
12788 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12789 TruncOps.push_back(TruncOp);
12790 }
12791 return DAG.getBuildVector(VT, DL, TruncOps);
12792 }
12793
12794 // Fold a series of buildvector, bitcast, and truncate if possible.
12795 // For example fold
12796 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12797 // (2xi32 (buildvector x, y)).
12798 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12799 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12801 N0.getOperand(0).hasOneUse()) {
12803 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12805
12806 // Check that the element types match.
12808 // Now we only need to compute the offset of the truncated elements.
12809 unsigned BuildVecNumElts = BuildVect.getNumOperands();
12810 unsigned TruncVecNumElts = VT.getVectorNumElements();
12812
12814 "Invalid number of elements");
12815
12817 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12818 Opnds.push_back(BuildVect.getOperand(i));
12819
12820 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12821 }
12822 }
12823
12824 // See if we can simplify the input to this truncate through knowledge that
12825 // only the low bits are being used.
12826 // For example "trunc (or (shl x, 8), y)" // -> trunc y
12827 // Currently we only perform this optimization on scalars because vectors
12828 // may have different active low bits.
12829 if (!VT.isVector()) {
12830 APInt Mask =
12832 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12833 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12834 }
12835
12836 // fold (truncate (load x)) -> (smaller load x)
12837 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12838 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12840 return Reduced;
12841
12842 // Handle the case where the load remains an extending load even
12843 // after truncation.
12844 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12846 if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12847 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12848 VT, LN0->getChain(), LN0->getBasePtr(),
12849 LN0->getMemoryVT(),
12850 LN0->getMemOperand());
12851 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12852 return NewLoad;
12853 }
12854 }
12855 }
12856
12857 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12858 // where ... are all 'undef'.
12859 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12861 SDValue V;
12862 unsigned Idx = 0;
12863 unsigned NumDefs = 0;
12864
12865 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12866 SDValue X = N0.getOperand(i);
12867 if (!X.isUndef()) {
12868 V = X;
12869 Idx = i;
12870 NumDefs++;
12871 }
12872 // Stop if more than one members are non-undef.
12873 if (NumDefs > 1)
12874 break;
12875
12876 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12878 X.getValueType().getVectorElementCount()));
12879 }
12880
12881 if (NumDefs == 0)
12882 return DAG.getUNDEF(VT);
12883
12884 if (NumDefs == 1) {
12885 assert(V.getNode() && "The single defined operand is empty!");
12887 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12888 if (i != Idx) {
12889 Opnds.push_back(DAG.getUNDEF(VTs[i]));
12890 continue;
12891 }
12892 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12893 AddToWorklist(NV.getNode());
12894 Opnds.push_back(NV);
12895 }
12896 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12897 }
12898 }
12899
12900 // Fold truncate of a bitcast of a vector to an extract of the low vector
12901 // element.
12902 //
12903 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12904 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12905 SDValue VecSrc = N0.getOperand(0);
12906 EVT VecSrcVT = VecSrc.getValueType();
12907 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12908 (!LegalOperations ||
12910 SDLoc SL(N);
12911
12912 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12913 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12914 DAG.getVectorIdxConstant(Idx, SL));
12915 }
12916 }
12917
12918 // Simplify the operands using demanded-bits information.
12920 return SDValue(N, 0);
12921
12922 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12923 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12924 // When the adde's carry is not used.
12925 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12926 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12927 // We only do for addcarry before legalize operation
12928 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12929 TLI.isOperationLegal(N0.getOpcode(), VT))) {
12930 SDLoc SL(N);
12931 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12932 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12933 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12934 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12935 }
12936
12937 // fold (truncate (extract_subvector(ext x))) ->
12938 // (extract_subvector x)
12939 // TODO: This can be generalized to cover cases where the truncate and extract
12940 // do not fully cancel each other out.
12941 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12942 SDValue N00 = N0.getOperand(0);
12943 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12944 N00.getOpcode() == ISD::ZERO_EXTEND ||
12945 N00.getOpcode() == ISD::ANY_EXTEND) {
12946 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12948 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12949 N00.getOperand(0), N0.getOperand(1));
12950 }
12951 }
12952
12954 return NewVSel;
12955
12956 // Narrow a suitable binary operation with a non-opaque constant operand by
12957 // moving it ahead of the truncate. This is limited to pre-legalization
12958 // because targets may prefer a wider type during later combines and invert
12959 // this transform.
12960 switch (N0.getOpcode()) {
12961 case ISD::ADD:
12962 case ISD::SUB:
12963 case ISD::MUL:
12964 case ISD::AND:
12965 case ISD::OR:
12966 case ISD::XOR:
12967 if (!LegalOperations && N0.hasOneUse() &&
12968 (isConstantOrConstantVector(N0.getOperand(0), true) ||
12969 isConstantOrConstantVector(N0.getOperand(1), true))) {
12970 // TODO: We already restricted this to pre-legalization, but for vectors
12971 // we are extra cautious to not create an unsupported operation.
12972 // Target-specific changes are likely needed to avoid regressions here.
12973 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12974 SDLoc DL(N);
12975 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12976 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12977 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12978 }
12979 }
12980 break;
12981 case ISD::USUBSAT:
12982 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12983 // enough to know that the upper bits are zero we must ensure that we don't
12984 // introduce an extra truncate.
12985 if (!LegalOperations && N0.hasOneUse() &&
12988 VT.getScalarSizeInBits() &&
12989 hasOperation(N0.getOpcode(), VT)) {
12990 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12991 DAG, SDLoc(N));
12992 }
12993 break;
12994 }
12995
12996 return SDValue();
12997}
12998
12999static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
13000 SDValue Elt = N->getOperand(i);
13001 if (Elt.getOpcode() != ISD::MERGE_VALUES)
13002 return Elt.getNode();
13003 return Elt.getOperand(Elt.getResNo()).getNode();
13004}
13005
13006/// build_pair (load, load) -> load
13007/// if load locations are consecutive.
13008SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
13009 assert(N->getOpcode() == ISD::BUILD_PAIR);
13010
13013
13014 // A BUILD_PAIR is always having the least significant part in elt 0 and the
13015 // most significant part in elt 1. So when combining into one large load, we
13016 // need to consider the endianness.
13017 if (DAG.getDataLayout().isBigEndian())
13018 std::swap(LD1, LD2);
13019
13020 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
13021 !LD1->hasOneUse() || !LD2->hasOneUse() ||
13022 LD1->getAddressSpace() != LD2->getAddressSpace())
13023 return SDValue();
13024
13025 bool LD1Fast = false;
13026 EVT LD1VT = LD1->getValueType(0);
13027 unsigned LD1Bytes = LD1VT.getStoreSize();
13028 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
13030 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
13031 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
13032 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
13033 LD1->getPointerInfo(), LD1->getAlign());
13034
13035 return SDValue();
13036}
13037
13038static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
13039 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
13040 // and Lo parts; on big-endian machines it doesn't.
13041 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
13042}
13043
13045 const TargetLowering &TLI) {
13046 // If this is not a bitcast to an FP type or if the target doesn't have
13047 // IEEE754-compliant FP logic, we're done.
13048 EVT VT = N->getValueType(0);
13049 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
13050 return SDValue();
13051
13052 // TODO: Handle cases where the integer constant is a different scalar
13053 // bitwidth to the FP.
13054 SDValue N0 = N->getOperand(0);
13055 EVT SourceVT = N0.getValueType();
13056 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
13057 return SDValue();
13058
13059 unsigned FPOpcode;
13060 APInt SignMask;
13061 switch (N0.getOpcode()) {
13062 case ISD::AND:
13064 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
13065 break;
13066 case ISD::XOR:
13068 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13069 break;
13070 case ISD::OR:
13072 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13073 break;
13074 default:
13075 return SDValue();
13076 }
13077
13078 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
13079 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
13080 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
13081 // fneg (fabs X)
13082 SDValue LogicOp0 = N0.getOperand(0);
13084 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
13085 LogicOp0.getOpcode() == ISD::BITCAST &&
13086 LogicOp0.getOperand(0).getValueType() == VT) {
13087 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
13089 if (N0.getOpcode() == ISD::OR)
13090 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
13091 return FPOp;
13092 }
13093
13094 return SDValue();
13095}
13096
13097SDValue DAGCombiner::visitBITCAST(SDNode *N) {
13098 SDValue N0 = N->getOperand(0);
13099 EVT VT = N->getValueType(0);
13100
13101 if (N0.isUndef())
13102 return DAG.getUNDEF(VT);
13103
13104 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
13105 // Only do this before legalize types, unless both types are integer and the
13106 // scalar type is legal. Only do this before legalize ops, since the target
13107 // maybe depending on the bitcast.
13108 // First check to see if this is all constant.
13109 // TODO: Support FP bitcasts after legalize types.
13110 if (VT.isVector() &&
13111 (!LegalTypes ||
13112 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
13113 TLI.isTypeLegal(VT.getVectorElementType()))) &&
13114 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
13115 cast<BuildVectorSDNode>(N0)->isConstant())
13118
13119 // If the input is a constant, let getNode fold it.
13120 if (isIntOrFPConstant(N0)) {
13121 // If we can't allow illegal operations, we need to check that this is just
13122 // a fp -> int or int -> conversion and that the resulting operation will
13123 // be legal.
13124 if (!LegalOperations ||
13125 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
13127 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13128 TLI.isOperationLegal(ISD::Constant, VT))) {
13129 SDValue C = DAG.getBitcast(VT, N0);
13130 if (C.getNode() != N)
13131 return C;
13132 }
13133 }
13134
13135 // (conv (conv x, t1), t2) -> (conv x, t2)
13136 if (N0.getOpcode() == ISD::BITCAST)
13137 return DAG.getBitcast(VT, N0.getOperand(0));
13138
13139 // fold (conv (load x)) -> (load (conv*)x)
13140 // If the resultant load doesn't need a higher alignment than the original!
13141 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13142 // Do not remove the cast if the types differ in endian layout.
13144 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13145 // If the load is volatile, we only want to change the load type if the
13146 // resulting load is legal. Otherwise we might increase the number of
13147 // memory accesses. We don't care if the original type was legal or not
13148 // as we assume software couldn't rely on the number of accesses of an
13149 // illegal type.
13150 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13151 TLI.isOperationLegal(ISD::LOAD, VT))) {
13153
13154 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13155 *LN0->getMemOperand())) {
13156 SDValue Load =
13157 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13158 LN0->getPointerInfo(), LN0->getAlign(),
13159 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13160 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13161 return Load;
13162 }
13163 }
13164
13165 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13166 return V;
13167
13168 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13169 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13170 //
13171 // For ppc_fp128:
13172 // fold (bitcast (fneg x)) ->
13173 // flipbit = signbit
13174 // (xor (bitcast x) (build_pair flipbit, flipbit))
13175 //
13176 // fold (bitcast (fabs x)) ->
13177 // flipbit = (and (extract_element (bitcast x), 0), signbit)
13178 // (xor (bitcast x) (build_pair flipbit, flipbit))
13179 // This often reduces constant pool loads.
13180 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13181 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13182 N0.getNode()->hasOneUse() && VT.isInteger() &&
13183 !VT.isVector() && !N0.getValueType().isVector()) {
13184 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13185 AddToWorklist(NewConv.getNode());
13186
13187 SDLoc DL(N);
13188 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13189 assert(VT.getSizeInBits() == 128);
13190 SDValue SignBit = DAG.getConstant(
13193 if (N0.getOpcode() == ISD::FNEG) {
13194 FlipBit = SignBit;
13195 AddToWorklist(FlipBit.getNode());
13196 } else {
13197 assert(N0.getOpcode() == ISD::FABS);
13198 SDValue Hi =
13201 SDLoc(NewConv)));
13202 AddToWorklist(Hi.getNode());
13203 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13204 AddToWorklist(FlipBit.getNode());
13205 }
13208 AddToWorklist(FlipBits.getNode());
13209 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13210 }
13211 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13212 if (N0.getOpcode() == ISD::FNEG)
13213 return DAG.getNode(ISD::XOR, DL, VT,
13214 NewConv, DAG.getConstant(SignBit, DL, VT));
13215 assert(N0.getOpcode() == ISD::FABS);
13216 return DAG.getNode(ISD::AND, DL, VT,
13217 NewConv, DAG.getConstant(~SignBit, DL, VT));
13218 }
13219
13220 // fold (bitconvert (fcopysign cst, x)) ->
13221 // (or (and (bitconvert x), sign), (and cst, (not sign)))
13222 // Note that we don't handle (copysign x, cst) because this can always be
13223 // folded to an fneg or fabs.
13224 //
13225 // For ppc_fp128:
13226 // fold (bitcast (fcopysign cst, x)) ->
13227 // flipbit = (and (extract_element
13228 // (xor (bitcast cst), (bitcast x)), 0),
13229 // signbit)
13230 // (xor (bitcast cst) (build_pair flipbit, flipbit))
13231 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
13233 VT.isInteger() && !VT.isVector()) {
13234 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13236 if (isTypeLegal(IntXVT)) {
13237 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13238 AddToWorklist(X.getNode());
13239
13240 // If X has a different width than the result/lhs, sext it or truncate it.
13241 unsigned VTWidth = VT.getSizeInBits();
13242 if (OrigXWidth < VTWidth) {
13243 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13244 AddToWorklist(X.getNode());
13245 } else if (OrigXWidth > VTWidth) {
13246 // To get the sign bit in the right place, we have to shift it right
13247 // before truncating.
13248 SDLoc DL(X);
13249 X = DAG.getNode(ISD::SRL, DL,
13250 X.getValueType(), X,
13252 X.getValueType()));
13253 AddToWorklist(X.getNode());
13254 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13255 AddToWorklist(X.getNode());
13256 }
13257
13258 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13259 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13260 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13261 AddToWorklist(Cst.getNode());
13262 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13263 AddToWorklist(X.getNode());
13264 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13265 AddToWorklist(XorResult.getNode());
13269 SDLoc(XorResult)));
13270 AddToWorklist(XorResult64.getNode());
13273 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13274 AddToWorklist(FlipBit.getNode());
13277 AddToWorklist(FlipBits.getNode());
13278 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13279 }
13280 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13281 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13282 X, DAG.getConstant(SignBit, SDLoc(X), VT));
13283 AddToWorklist(X.getNode());
13284
13285 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13286 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13287 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13288 AddToWorklist(Cst.getNode());
13289
13290 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13291 }
13292 }
13293
13294 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13295 if (N0.getOpcode() == ISD::BUILD_PAIR)
13297 return CombineLD;
13298
13299 // Remove double bitcasts from shuffles - this is often a legacy of
13300 // XformToShuffleWithZero being used to combine bitmaskings (of
13301 // float vectors bitcast to integer vectors) into shuffles.
13302 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13303 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13304 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13308
13309 // If operands are a bitcast, peek through if it casts the original VT.
13310 // If operands are a constant, just bitcast back to original VT.
13311 auto PeekThroughBitcast = [&](SDValue Op) {
13312 if (Op.getOpcode() == ISD::BITCAST &&
13313 Op.getOperand(0).getValueType() == VT)
13314 return SDValue(Op.getOperand(0));
13315 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
13317 return DAG.getBitcast(VT, Op);
13318 return SDValue();
13319 };
13320
13321 // FIXME: If either input vector is bitcast, try to convert the shuffle to
13322 // the result type of this bitcast. This would eliminate at least one
13323 // bitcast. See the transform in InstCombine.
13326 if (!(SV0 && SV1))
13327 return SDValue();
13328
13329 int MaskScale =
13332 for (int M : SVN->getMask())
13333 for (int i = 0; i != MaskScale; ++i)
13334 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13335
13337 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13338 if (LegalShuffle)
13339 return LegalShuffle;
13340 }
13341
13342 return SDValue();
13343}
13344
13345SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13346 EVT VT = N->getValueType(0);
13347 return CombineConsecutiveLoads(N, VT);
13348}
13349
13350SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13351 SDValue N0 = N->getOperand(0);
13352
13353 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13354 return N0;
13355
13356 return SDValue();
13357}
13358
13359/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13360/// operands. DstEltVT indicates the destination element value type.
13361SDValue DAGCombiner::
13362ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13363 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13364
13365 // If this is already the right type, we're done.
13366 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13367
13368 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13369 unsigned DstBitSize = DstEltVT.getSizeInBits();
13370
13371 // If this is a conversion of N elements of one type to N elements of another
13372 // type, convert each element. This handles FP<->INT cases.
13373 if (SrcBitSize == DstBitSize) {
13375 for (SDValue Op : BV->op_values()) {
13376 // If the vector element type is not legal, the BUILD_VECTOR operands
13377 // are promoted and implicitly truncated. Make that explicit here.
13378 if (Op.getValueType() != SrcEltVT)
13379 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13380 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13381 AddToWorklist(Ops.back().getNode());
13382 }
13384 BV->getValueType(0).getVectorNumElements());
13385 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13386 }
13387
13388 // Otherwise, we're growing or shrinking the elements. To avoid having to
13389 // handle annoying details of growing/shrinking FP values, we convert them to
13390 // int first.
13391 if (SrcEltVT.isFloatingPoint()) {
13392 // Convert the input float vector to a int vector where the elements are the
13393 // same sizes.
13394 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13396 SrcEltVT = IntVT;
13397 }
13398
13399 // Now we know the input is an integer vector. If the output is a FP type,
13400 // convert to integer first, then to FP of the right size.
13401 if (DstEltVT.isFloatingPoint()) {
13402 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13404
13405 // Next, convert to FP elements of the same size.
13407 }
13408
13409 // Okay, we know the src/dst types are both integers of differing types.
13410 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
13411
13412 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13413 // BuildVectorSDNode?
13415
13416 // Extract the constant raw bit data.
13419 bool IsLE = DAG.getDataLayout().isLittleEndian();
13420 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13421 return SDValue();
13422
13423 SDLoc DL(BV);
13425 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13426 if (UndefElements[I])
13427 Ops.push_back(DAG.getUNDEF(DstEltVT));
13428 else
13429 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13430 }
13431
13432 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13433 return DAG.getBuildVector(VT, DL, Ops);
13434}
13435
13436// Returns true if floating point contraction is allowed on the FMUL-SDValue
13437// `N`
13439 assert(N.getOpcode() == ISD::FMUL);
13440
13441 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13442 N->getFlags().hasAllowContract();
13443}
13444
13445// Returns true if `N` can assume no infinities involved in its computation.
13447 return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
13448}
13449
13450/// Try to perform FMA combining on a given FADD node.
13451SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13452 SDValue N0 = N->getOperand(0);
13453 SDValue N1 = N->getOperand(1);
13454 EVT VT = N->getValueType(0);
13455 SDLoc SL(N);
13456
13457 const TargetOptions &Options = DAG.getTarget().Options;
13458
13459 // Floating-point multiply-add with intermediate rounding.
13460 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13461
13462 // Floating-point multiply-add without intermediate rounding.
13463 bool HasFMA =
13465 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13466
13467 // No valid opcode, do not combine.
13468 if (!HasFMAD && !HasFMA)
13469 return SDValue();
13470
13471 bool CanReassociate =
13472 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13473 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13474 Options.UnsafeFPMath || HasFMAD);
13475 // If the addition is not contractable, do not combine.
13476 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13477 return SDValue();
13478
13479 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13480 return SDValue();
13481
13482 // Always prefer FMAD to FMA for precision.
13485
13486 auto isFusedOp = [&](SDValue N) {
13487 unsigned Opcode = N.getOpcode();
13488 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13489 };
13490
13491 // Is the node an FMUL and contractable either due to global flags or
13492 // SDNodeFlags.
13494 if (N.getOpcode() != ISD::FMUL)
13495 return false;
13496 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13497 };
13498 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13499 // prefer to fold the multiply with fewer uses.
13501 if (N0.getNode()->use_size() > N1.getNode()->use_size())
13502 std::swap(N0, N1);
13503 }
13504
13505 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13506 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13507 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13508 N0.getOperand(1), N1);
13509 }
13510
13511 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13512 // Note: Commutes FADD operands.
13513 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13514 return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13515 N1.getOperand(1), N0);
13516 }
13517
13518 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13519 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13520 // This requires reassociation because it changes the order of operations.
13521 SDValue FMA, E;
13522 if (CanReassociate && isFusedOp(N0) &&
13523 N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13524 N0.getOperand(2).hasOneUse()) {
13525 FMA = N0;
13526 E = N1;
13527 } else if (CanReassociate && isFusedOp(N1) &&
13528 N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13529 N1.getOperand(2).hasOneUse()) {
13530 FMA = N1;
13531 E = N0;
13532 }
13533 if (FMA && E) {
13534 SDValue A = FMA.getOperand(0);
13535 SDValue B = FMA.getOperand(1);
13536 SDValue C = FMA.getOperand(2).getOperand(0);
13537 SDValue D = FMA.getOperand(2).getOperand(1);
13538 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13539 return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13540 }
13541
13542 // Look through FP_EXTEND nodes to do more combining.
13543
13544 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13545 if (N0.getOpcode() == ISD::FP_EXTEND) {
13546 SDValue N00 = N0.getOperand(0);
13547 if (isContractableFMUL(N00) &&
13549 N00.getValueType())) {
13550 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13551 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13552 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13553 N1);
13554 }
13555 }
13556
13557 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13558 // Note: Commutes FADD operands.
13559 if (N1.getOpcode() == ISD::FP_EXTEND) {
13560 SDValue N10 = N1.getOperand(0);
13561 if (isContractableFMUL(N10) &&
13563 N10.getValueType())) {
13564 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13565 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13566 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13567 N0);
13568 }
13569 }
13570
13571 // More folding opportunities when target permits.
13572 if (Aggressive) {
13573 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13574 // -> (fma x, y, (fma (fpext u), (fpext v), z))
13576 SDValue Z) {
13577 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13578 DAG.getNode(PreferredFusedOpcode, SL, VT,
13579 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13580 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13581 Z));
13582 };
13583 if (isFusedOp(N0)) {
13584 SDValue N02 = N0.getOperand(2);
13585 if (N02.getOpcode() == ISD::FP_EXTEND) {
13586 SDValue N020 = N02.getOperand(0);
13587 if (isContractableFMUL(N020) &&
13589 N020.getValueType())) {
13590 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13591 N020.getOperand(0), N020.getOperand(1),
13592 N1);
13593 }
13594 }
13595 }
13596
13597 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13598 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13599 // FIXME: This turns two single-precision and one double-precision
13600 // operation into two double-precision operations, which might not be
13601 // interesting for all targets, especially GPUs.
13603 SDValue Z) {
13604 return DAG.getNode(
13605 PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13606 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13607 DAG.getNode(PreferredFusedOpcode, SL, VT,
13608 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13609 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13610 };
13611 if (N0.getOpcode() == ISD::FP_EXTEND) {
13612 SDValue N00 = N0.getOperand(0);
13613 if (isFusedOp(N00)) {
13614 SDValue N002 = N00.getOperand(2);
13615 if (isContractableFMUL(N002) &&
13617 N00.getValueType())) {
13618 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13619 N002.getOperand(0), N002.getOperand(1),
13620 N1);
13621 }
13622 }
13623 }
13624
13625 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13626 // -> (fma y, z, (fma (fpext u), (fpext v), x))
13627 if (isFusedOp(N1)) {
13628 SDValue N12 = N1.getOperand(2);
13629 if (N12.getOpcode() == ISD::FP_EXTEND) {
13630 SDValue N120 = N12.getOperand(0);
13631 if (isContractableFMUL(N120) &&
13633 N120.getValueType())) {
13634 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13635 N120.getOperand(0), N120.getOperand(1),
13636 N0);
13637 }
13638 }
13639 }
13640
13641 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13642 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13643 // FIXME: This turns two single-precision and one double-precision
13644 // operation into two double-precision operations, which might not be
13645 // interesting for all targets, especially GPUs.
13646 if (N1.getOpcode() == ISD::FP_EXTEND) {
13647 SDValue N10 = N1.getOperand(0);
13648 if (isFusedOp(N10)) {
13649 SDValue N102 = N10.getOperand(2);
13650 if (isContractableFMUL(N102) &&
13652 N10.getValueType())) {
13653 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13654 N102.getOperand(0), N102.getOperand(1),
13655 N0);
13656 }
13657 }
13658 }
13659 }
13660
13661 return SDValue();
13662}
13663
13664/// Try to perform FMA combining on a given FSUB node.
13665SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13666 SDValue N0 = N->getOperand(0);
13667 SDValue N1 = N->getOperand(1);
13668 EVT VT = N->getValueType(0);
13669 SDLoc SL(N);
13670
13671 const TargetOptions &Options = DAG.getTarget().Options;
13672 // Floating-point multiply-add with intermediate rounding.
13673 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13674
13675 // Floating-point multiply-add without intermediate rounding.
13676 bool HasFMA =
13678 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13679
13680 // No valid opcode, do not combine.
13681 if (!HasFMAD && !HasFMA)
13682 return SDValue();
13683
13684 const SDNodeFlags Flags = N->getFlags();
13685 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13686 Options.UnsafeFPMath || HasFMAD);
13687
13688 // If the subtraction is not contractable, do not combine.
13689 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13690 return SDValue();
13691
13692 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13693 return SDValue();
13694
13695 // Always prefer FMAD to FMA for precision.
13698 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13699
13700 // Is the node an FMUL and contractable either due to global flags or
13701 // SDNodeFlags.
13703 if (N.getOpcode() != ISD::FMUL)
13704 return false;
13705 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13706 };
13707
13708 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13709 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13710 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13711 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13712 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13713 }
13714 return SDValue();
13715 };
13716
13717 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13718 // Note: Commutes FSUB operands.
13719 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13720 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13721 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13722 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13723 YZ.getOperand(1), X);
13724 }
13725 return SDValue();
13726 };
13727
13728 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13729 // prefer to fold the multiply with fewer uses.
13731 (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13732 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13733 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13734 return V;
13735 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13736 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13737 return V;
13738 } else {
13739 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13740 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13741 return V;
13742 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13743 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13744 return V;
13745 }
13746
13747 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13748 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13749 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13750 SDValue N00 = N0.getOperand(0).getOperand(0);
13751 SDValue N01 = N0.getOperand(0).getOperand(1);
13752 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13753 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13754 DAG.getNode(ISD::FNEG, SL, VT, N1));
13755 }
13756
13757 // Look through FP_EXTEND nodes to do more combining.
13758
13759 // fold (fsub (fpext (fmul x, y)), z)
13760 // -> (fma (fpext x), (fpext y), (fneg z))
13761 if (N0.getOpcode() == ISD::FP_EXTEND) {
13762 SDValue N00 = N0.getOperand(0);
13763 if (isContractableFMUL(N00) &&
13765 N00.getValueType())) {
13766 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13767 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13768 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13769 DAG.getNode(ISD::FNEG, SL, VT, N1));
13770 }
13771 }
13772
13773 // fold (fsub x, (fpext (fmul y, z)))
13774 // -> (fma (fneg (fpext y)), (fpext z), x)
13775 // Note: Commutes FSUB operands.
13776 if (N1.getOpcode() == ISD::FP_EXTEND) {
13777 SDValue N10 = N1.getOperand(0);
13778 if (isContractableFMUL(N10) &&
13780 N10.getValueType())) {
13781 return DAG.getNode(
13782 PreferredFusedOpcode, SL, VT,
13783 DAG.getNode(ISD::FNEG, SL, VT,
13784 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13785 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13786 }
13787 }
13788
13789 // fold (fsub (fpext (fneg (fmul, x, y))), z)
13790 // -> (fneg (fma (fpext x), (fpext y), z))
13791 // Note: This could be removed with appropriate canonicalization of the
13792 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13793 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13794 // from implementing the canonicalization in visitFSUB.
13795 if (N0.getOpcode() == ISD::FP_EXTEND) {
13796 SDValue N00 = N0.getOperand(0);
13797 if (N00.getOpcode() == ISD::FNEG) {
13798 SDValue N000 = N00.getOperand(0);
13799 if (isContractableFMUL(N000) &&
13801 N00.getValueType())) {
13802 return DAG.getNode(
13803 ISD::FNEG, SL, VT,
13804 DAG.getNode(PreferredFusedOpcode, SL, VT,
13805 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13806 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13807 N1));
13808 }
13809 }
13810 }
13811
13812 // fold (fsub (fneg (fpext (fmul, x, y))), z)
13813 // -> (fneg (fma (fpext x)), (fpext y), z)
13814 // Note: This could be removed with appropriate canonicalization of the
13815 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13816 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13817 // from implementing the canonicalization in visitFSUB.
13818 if (N0.getOpcode() == ISD::FNEG) {
13819 SDValue N00 = N0.getOperand(0);
13820 if (N00.getOpcode() == ISD::FP_EXTEND) {
13821 SDValue N000 = N00.getOperand(0);
13822 if (isContractableFMUL(N000) &&
13824 N000.getValueType())) {
13825 return DAG.getNode(
13826 ISD::FNEG, SL, VT,
13827 DAG.getNode(PreferredFusedOpcode, SL, VT,
13828 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13829 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13830 N1));
13831 }
13832 }
13833 }
13834
13835 auto isReassociable = [Options](SDNode *N) {
13836 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13837 };
13838
13841 return isContractableFMUL(N) && isReassociable(N.getNode());
13842 };
13843
13844 auto isFusedOp = [&](SDValue N) {
13845 unsigned Opcode = N.getOpcode();
13846 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13847 };
13848
13849 // More folding opportunities when target permits.
13850 if (Aggressive && isReassociable(N)) {
13851 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13852 // fold (fsub (fma x, y, (fmul u, v)), z)
13853 // -> (fma x, y (fma u, v, (fneg z)))
13854 if (CanFuse && isFusedOp(N0) &&
13856 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13857 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13858 N0.getOperand(1),
13859 DAG.getNode(PreferredFusedOpcode, SL, VT,
13860 N0.getOperand(2).getOperand(0),
13861 N0.getOperand(2).getOperand(1),
13862 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13863 }
13864
13865 // fold (fsub x, (fma y, z, (fmul u, v)))
13866 // -> (fma (fneg y), z, (fma (fneg u), v, x))
13867 if (CanFuse && isFusedOp(N1) &&
13868 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13869 N1->hasOneUse() && NoSignedZero) {
13870 SDValue N20 = N1.getOperand(2).getOperand(0);
13871 SDValue N21 = N1.getOperand(2).getOperand(1);
13872 return DAG.getNode(
13873 PreferredFusedOpcode, SL, VT,
13874 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13875 DAG.getNode(PreferredFusedOpcode, SL, VT,
13876 DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13877 }
13878
13879 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13880 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13881 if (isFusedOp(N0) && N0->hasOneUse()) {
13882 SDValue N02 = N0.getOperand(2);
13883 if (N02.getOpcode() == ISD::FP_EXTEND) {
13884 SDValue N020 = N02.getOperand(0);
13887 N020.getValueType())) {
13888 return DAG.getNode(
13889 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13890 DAG.getNode(
13891 PreferredFusedOpcode, SL, VT,
13892 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13893 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13894 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13895 }
13896 }
13897 }
13898
13899 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13900 // -> (fma (fpext x), (fpext y),
13901 // (fma (fpext u), (fpext v), (fneg z)))
13902 // FIXME: This turns two single-precision and one double-precision
13903 // operation into two double-precision operations, which might not be
13904 // interesting for all targets, especially GPUs.
13905 if (N0.getOpcode() == ISD::FP_EXTEND) {
13906 SDValue N00 = N0.getOperand(0);
13907 if (isFusedOp(N00)) {
13908 SDValue N002 = N00.getOperand(2);
13911 N00.getValueType())) {
13912 return DAG.getNode(
13913 PreferredFusedOpcode, SL, VT,
13914 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13915 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13916 DAG.getNode(
13917 PreferredFusedOpcode, SL, VT,
13918 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13919 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13920 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13921 }
13922 }
13923 }
13924
13925 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13926 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13927 if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13928 N1->hasOneUse()) {
13929 SDValue N120 = N1.getOperand(2).getOperand(0);
13932 N120.getValueType())) {
13933 SDValue N1200 = N120.getOperand(0);
13934 SDValue N1201 = N120.getOperand(1);
13935 return DAG.getNode(
13936 PreferredFusedOpcode, SL, VT,
13937 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13938 DAG.getNode(PreferredFusedOpcode, SL, VT,
13939 DAG.getNode(ISD::FNEG, SL, VT,
13940 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13941 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13942 }
13943 }
13944
13945 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13946 // -> (fma (fneg (fpext y)), (fpext z),
13947 // (fma (fneg (fpext u)), (fpext v), x))
13948 // FIXME: This turns two single-precision and one double-precision
13949 // operation into two double-precision operations, which might not be
13950 // interesting for all targets, especially GPUs.
13951 if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
13952 SDValue CvtSrc = N1.getOperand(0);
13953 SDValue N100 = CvtSrc.getOperand(0);
13954 SDValue N101 = CvtSrc.getOperand(1);
13955 SDValue N102 = CvtSrc.getOperand(2);
13958 CvtSrc.getValueType())) {
13959 SDValue N1020 = N102.getOperand(0);
13960 SDValue N1021 = N102.getOperand(1);
13961 return DAG.getNode(
13962 PreferredFusedOpcode, SL, VT,
13963 DAG.getNode(ISD::FNEG, SL, VT,
13964 DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13965 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13966 DAG.getNode(PreferredFusedOpcode, SL, VT,
13967 DAG.getNode(ISD::FNEG, SL, VT,
13968 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13969 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13970 }
13971 }
13972 }
13973
13974 return SDValue();
13975}
13976
13977/// Try to perform FMA combining on a given FMUL node based on the distributive
13978/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13979/// subtraction instead of addition).
13980SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13981 SDValue N0 = N->getOperand(0);
13982 SDValue N1 = N->getOperand(1);
13983 EVT VT = N->getValueType(0);
13984 SDLoc SL(N);
13985
13986 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13987
13988 const TargetOptions &Options = DAG.getTarget().Options;
13989
13990 // The transforms below are incorrect when x == 0 and y == inf, because the
13991 // intermediate multiplication produces a nan.
13992 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
13993 if (!hasNoInfs(Options, FAdd))
13994 return SDValue();
13995
13996 // Floating-point multiply-add without intermediate rounding.
13997 bool HasFMA =
14000 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14001
14002 // Floating-point multiply-add with intermediate rounding. This can result
14003 // in a less precise result due to the changed rounding order.
14004 bool HasFMAD = Options.UnsafeFPMath &&
14005 (LegalOperations && TLI.isFMADLegal(DAG, N));
14006
14007 // No valid opcode, do not combine.
14008 if (!HasFMAD && !HasFMA)
14009 return SDValue();
14010
14011 // Always prefer FMAD to FMA for precision.
14014
14015 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
14016 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
14017 auto FuseFADD = [&](SDValue X, SDValue Y) {
14018 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
14019 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
14020 if (C->isExactlyValue(+1.0))
14021 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14022 Y);
14023 if (C->isExactlyValue(-1.0))
14024 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14025 DAG.getNode(ISD::FNEG, SL, VT, Y));
14026 }
14027 }
14028 return SDValue();
14029 };
14030
14031 if (SDValue FMA = FuseFADD(N0, N1))
14032 return FMA;
14033 if (SDValue FMA = FuseFADD(N1, N0))
14034 return FMA;
14035
14036 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
14037 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
14038 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
14039 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
14040 auto FuseFSUB = [&](SDValue X, SDValue Y) {
14041 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
14042 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
14043 if (C0->isExactlyValue(+1.0))
14044 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14045 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14046 Y);
14047 if (C0->isExactlyValue(-1.0))
14048 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14049 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14050 DAG.getNode(ISD::FNEG, SL, VT, Y));
14051 }
14052 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
14053 if (C1->isExactlyValue(+1.0))
14054 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14055 DAG.getNode(ISD::FNEG, SL, VT, Y));
14056 if (C1->isExactlyValue(-1.0))
14057 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14058 Y);
14059 }
14060 }
14061 return SDValue();
14062 };
14063
14064 if (SDValue FMA = FuseFSUB(N0, N1))
14065 return FMA;
14066 if (SDValue FMA = FuseFSUB(N1, N0))
14067 return FMA;
14068
14069 return SDValue();
14070}
14071
14072SDValue DAGCombiner::visitFADD(SDNode *N) {
14073 SDValue N0 = N->getOperand(0);
14074 SDValue N1 = N->getOperand(1);
14077 EVT VT = N->getValueType(0);
14078 SDLoc DL(N);
14079 const TargetOptions &Options = DAG.getTarget().Options;
14080 SDNodeFlags Flags = N->getFlags();
14082
14083 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14084 return R;
14085
14086 // fold (fadd c1, c2) -> c1 + c2
14087 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
14088 return C;
14089
14090 // canonicalize constant to RHS
14091 if (N0CFP && !N1CFP)
14092 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
14093
14094 // fold vector ops
14095 if (VT.isVector())
14097 return FoldedVOp;
14098
14099 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
14101 if (N1C && N1C->isZero())
14102 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
14103 return N0;
14104
14106 return NewSel;
14107
14108 // fold (fadd A, (fneg B)) -> (fsub A, B)
14109 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14111 N1, DAG, LegalOperations, ForCodeSize))
14112 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
14113
14114 // fold (fadd (fneg A), B) -> (fsub B, A)
14115 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14117 N0, DAG, LegalOperations, ForCodeSize))
14118 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
14119
14120 auto isFMulNegTwo = [](SDValue FMul) {
14121 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
14122 return false;
14123 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
14124 return C && C->isExactlyValue(-2.0);
14125 };
14126
14127 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14128 if (isFMulNegTwo(N0)) {
14129 SDValue B = N0.getOperand(0);
14130 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14131 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14132 }
14133 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14134 if (isFMulNegTwo(N1)) {
14135 SDValue B = N1.getOperand(0);
14136 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14137 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14138 }
14139
14140 // No FP constant should be created after legalization as Instruction
14141 // Selection pass has a hard time dealing with FP constants.
14142 bool AllowNewConst = (Level < AfterLegalizeDAG);
14143
14144 // If nnan is enabled, fold lots of things.
14145 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14146 // If allowed, fold (fadd (fneg x), x) -> 0.0
14147 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14148 return DAG.getConstantFP(0.0, DL, VT);
14149
14150 // If allowed, fold (fadd x, (fneg x)) -> 0.0
14151 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14152 return DAG.getConstantFP(0.0, DL, VT);
14153 }
14154
14155 // If 'unsafe math' or reassoc and nsz, fold lots of things.
14156 // TODO: break out portions of the transformations below for which Unsafe is
14157 // considered and which do not require both nsz and reassoc
14158 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14159 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14160 AllowNewConst) {
14161 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14162 if (N1CFP && N0.getOpcode() == ISD::FADD &&
14164 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14165 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14166 }
14167
14168 // We can fold chains of FADD's of the same value into multiplications.
14169 // This transform is not safe in general because we are reducing the number
14170 // of rounding steps.
14171 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14172 if (N0.getOpcode() == ISD::FMUL) {
14175
14176 // (fadd (fmul x, c), x) -> (fmul x, c+1)
14177 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14178 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14179 DAG.getConstantFP(1.0, DL, VT));
14180 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14181 }
14182
14183 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14184 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14185 N1.getOperand(0) == N1.getOperand(1) &&
14186 N0.getOperand(0) == N1.getOperand(0)) {
14187 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14188 DAG.getConstantFP(2.0, DL, VT));
14189 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14190 }
14191 }
14192
14193 if (N1.getOpcode() == ISD::FMUL) {
14194 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14195 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14196
14197 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14198 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14199 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14200 DAG.getConstantFP(1.0, DL, VT));
14201 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14202 }
14203
14204 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14205 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14206 N0.getOperand(0) == N0.getOperand(1) &&
14207 N1.getOperand(0) == N0.getOperand(0)) {
14208 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14209 DAG.getConstantFP(2.0, DL, VT));
14210 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14211 }
14212 }
14213
14214 if (N0.getOpcode() == ISD::FADD) {
14216 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14217 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14218 (N0.getOperand(0) == N1)) {
14219 return DAG.getNode(ISD::FMUL, DL, VT, N1,
14220 DAG.getConstantFP(3.0, DL, VT));
14221 }
14222 }
14223
14224 if (N1.getOpcode() == ISD::FADD) {
14225 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14226 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14227 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14228 N1.getOperand(0) == N0) {
14229 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14230 DAG.getConstantFP(3.0, DL, VT));
14231 }
14232 }
14233
14234 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14235 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14236 N0.getOperand(0) == N0.getOperand(1) &&
14237 N1.getOperand(0) == N1.getOperand(1) &&
14238 N0.getOperand(0) == N1.getOperand(0)) {
14239 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14240 DAG.getConstantFP(4.0, DL, VT));
14241 }
14242 }
14243 } // enable-unsafe-fp-math
14244
14245 // FADD -> FMA combines:
14247 AddToWorklist(Fused.getNode());
14248 return Fused;
14249 }
14250 return SDValue();
14251}
14252
14253SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14254 SDValue Chain = N->getOperand(0);
14255 SDValue N0 = N->getOperand(1);
14256 SDValue N1 = N->getOperand(2);
14257 EVT VT = N->getValueType(0);
14258 EVT ChainVT = N->getValueType(1);
14259 SDLoc DL(N);
14261
14262 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14263 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14265 N1, DAG, LegalOperations, ForCodeSize)) {
14266 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14267 {Chain, N0, NegN1});
14268 }
14269
14270 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14271 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14273 N0, DAG, LegalOperations, ForCodeSize)) {
14274 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14275 {Chain, N1, NegN0});
14276 }
14277 return SDValue();
14278}
14279
14280SDValue DAGCombiner::visitFSUB(SDNode *N) {
14281 SDValue N0 = N->getOperand(0);
14282 SDValue N1 = N->getOperand(1);
14285 EVT VT = N->getValueType(0);
14286 SDLoc DL(N);
14287 const TargetOptions &Options = DAG.getTarget().Options;
14288 const SDNodeFlags Flags = N->getFlags();
14290
14291 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14292 return R;
14293
14294 // fold (fsub c1, c2) -> c1-c2
14295 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14296 return C;
14297
14298 // fold vector ops
14299 if (VT.isVector())
14301 return FoldedVOp;
14302
14304 return NewSel;
14305
14306 // (fsub A, 0) -> A
14307 if (N1CFP && N1CFP->isZero()) {
14308 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14309 Flags.hasNoSignedZeros()) {
14310 return N0;
14311 }
14312 }
14313
14314 if (N0 == N1) {
14315 // (fsub x, x) -> 0.0
14316 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14317 return DAG.getConstantFP(0.0f, DL, VT);
14318 }
14319
14320 // (fsub -0.0, N1) -> -N1
14321 if (N0CFP && N0CFP->isZero()) {
14322 if (N0CFP->isNegative() ||
14323 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14324 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14325 // flushed to zero, unless all users treat denorms as zero (DAZ).
14326 // FIXME: This transform will change the sign of a NaN and the behavior
14327 // of a signaling NaN. It is only valid when a NoNaN flag is present.
14330 if (SDValue NegN1 =
14331 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14332 return NegN1;
14333 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14334 return DAG.getNode(ISD::FNEG, DL, VT, N1);
14335 }
14336 }
14337 }
14338
14339 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14340 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14341 N1.getOpcode() == ISD::FADD) {
14342 // X - (X + Y) -> -Y
14343 if (N0 == N1->getOperand(0))
14344 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14345 // X - (Y + X) -> -Y
14346 if (N0 == N1->getOperand(1))
14347 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14348 }
14349
14350 // fold (fsub A, (fneg B)) -> (fadd A, B)
14351 if (SDValue NegN1 =
14352 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14353 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14354
14355 // FSUB -> FMA combines:
14357 AddToWorklist(Fused.getNode());
14358 return Fused;
14359 }
14360
14361 return SDValue();
14362}
14363
14364SDValue DAGCombiner::visitFMUL(SDNode *N) {
14365 SDValue N0 = N->getOperand(0);
14366 SDValue N1 = N->getOperand(1);
14368 EVT VT = N->getValueType(0);
14369 SDLoc DL(N);
14370 const TargetOptions &Options = DAG.getTarget().Options;
14371 const SDNodeFlags Flags = N->getFlags();
14373
14374 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14375 return R;
14376
14377 // fold (fmul c1, c2) -> c1*c2
14378 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14379 return C;
14380
14381 // canonicalize constant to RHS
14384 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14385
14386 // fold vector ops
14387 if (VT.isVector())
14389 return FoldedVOp;
14390
14392 return NewSel;
14393
14394 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14395 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14397 N0.getOpcode() == ISD::FMUL) {
14398 SDValue N00 = N0.getOperand(0);
14399 SDValue N01 = N0.getOperand(1);
14400 // Avoid an infinite loop by making sure that N00 is not a constant
14401 // (the inner multiply has not been constant folded yet).
14404 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14405 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14406 }
14407 }
14408
14409 // Match a special-case: we convert X * 2.0 into fadd.
14410 // fmul (fadd X, X), C -> fmul X, 2.0 * C
14411 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14412 N0.getOperand(0) == N0.getOperand(1)) {
14413 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14414 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14415 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14416 }
14417 }
14418
14419 // fold (fmul X, 2.0) -> (fadd X, X)
14420 if (N1CFP && N1CFP->isExactlyValue(+2.0))
14421 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14422
14423 // fold (fmul X, -1.0) -> (fsub -0.0, X)
14424 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14425 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14426 return DAG.getNode(ISD::FSUB, DL, VT,
14427 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14428 }
14429 }
14430
14431 // -N0 * -N1 --> N0 * N1
14436 SDValue NegN0 =
14437 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14438 SDValue NegN1 =
14439 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14440 if (NegN0 && NegN1 &&
14443 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14444
14445 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14446 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14447 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14448 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14449 TLI.isOperationLegal(ISD::FABS, VT)) {
14450 SDValue Select = N0, X = N1;
14451 if (Select.getOpcode() != ISD::SELECT)
14452 std::swap(Select, X);
14453
14454 SDValue Cond = Select.getOperand(0);
14455 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14456 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14457
14458 if (TrueOpnd && FalseOpnd &&
14459 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14460 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14461 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14462 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14463 switch (CC) {
14464 default: break;
14465 case ISD::SETOLT:
14466 case ISD::SETULT:
14467 case ISD::SETOLE:
14468 case ISD::SETULE:
14469 case ISD::SETLT:
14470 case ISD::SETLE:
14473 case ISD::SETOGT:
14474 case ISD::SETUGT:
14475 case ISD::SETOGE:
14476 case ISD::SETUGE:
14477 case ISD::SETGT:
14478 case ISD::SETGE:
14479 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14480 TLI.isOperationLegal(ISD::FNEG, VT))
14481 return DAG.getNode(ISD::FNEG, DL, VT,
14482 DAG.getNode(ISD::FABS, DL, VT, X));
14483 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14484 return DAG.getNode(ISD::FABS, DL, VT, X);
14485
14486 break;
14487 }
14488 }
14489 }
14490
14491 // FMUL -> FMA combines:
14493 AddToWorklist(Fused.getNode());
14494 return Fused;
14495 }
14496
14497 return SDValue();
14498}
14499
14500SDValue DAGCombiner::visitFMA(SDNode *N) {
14501 SDValue N0 = N->getOperand(0);
14502 SDValue N1 = N->getOperand(1);
14503 SDValue N2 = N->getOperand(2);
14506 EVT VT = N->getValueType(0);
14507 SDLoc DL(N);
14508 const TargetOptions &Options = DAG.getTarget().Options;
14509 // FMA nodes have flags that propagate to the created nodes.
14511
14512 bool UnsafeFPMath =
14513 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14514
14515 // Constant fold FMA.
14516 if (isa<ConstantFPSDNode>(N0) &&
14519 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14520 }
14521
14522 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14527 SDValue NegN0 =
14528 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14529 SDValue NegN1 =
14530 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14531 if (NegN0 && NegN1 &&
14534 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14535
14536 if (UnsafeFPMath) {
14537 if (N0CFP && N0CFP->isZero())
14538 return N2;
14539 if (N1CFP && N1CFP->isZero())
14540 return N2;
14541 }
14542
14543 if (N0CFP && N0CFP->isExactlyValue(1.0))
14544 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14545 if (N1CFP && N1CFP->isExactlyValue(1.0))
14546 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14547
14548 // Canonicalize (fma c, x, y) -> (fma x, c, y)
14551 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14552
14553 if (UnsafeFPMath) {
14554 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14555 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14558 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14559 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14560 }
14561
14562 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14563 if (N0.getOpcode() == ISD::FMUL &&
14566 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14567 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14568 N2);
14569 }
14570 }
14571
14572 // (fma x, -1, y) -> (fadd (fneg x), y)
14573 if (N1CFP) {
14574 if (N1CFP->isExactlyValue(1.0))
14575 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14576
14577 if (N1CFP->isExactlyValue(-1.0) &&
14578 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14579 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14580 AddToWorklist(RHSNeg.getNode());
14581 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14582 }
14583
14584 // fma (fneg x), K, y -> fma x -K, y
14585 if (N0.getOpcode() == ISD::FNEG &&
14587 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14588 ForCodeSize)))) {
14589 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14590 DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14591 }
14592 }
14593
14594 if (UnsafeFPMath) {
14595 // (fma x, c, x) -> (fmul x, (c+1))
14596 if (N1CFP && N0 == N2) {
14597 return DAG.getNode(
14598 ISD::FMUL, DL, VT, N0,
14599 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14600 }
14601
14602 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14603 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14604 return DAG.getNode(
14605 ISD::FMUL, DL, VT, N0,
14606 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14607 }
14608 }
14609
14610 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14611 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14612 if (!TLI.isFNegFree(VT))
14614 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14615 return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14616 return SDValue();
14617}
14618
14619// Combine multiple FDIVs with the same divisor into multiple FMULs by the
14620// reciprocal.
14621// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14622// Notice that this is not always beneficial. One reason is different targets
14623// may have different costs for FDIV and FMUL, so sometimes the cost of two
14624// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14625// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14626SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14627 // TODO: Limit this transform based on optsize/minsize - it always creates at
14628 // least 1 extra instruction. But the perf win may be substantial enough
14629 // that only minsize should restrict this.
14631 const SDNodeFlags Flags = N->getFlags();
14632 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14633 return SDValue();
14634
14635 // Skip if current node is a reciprocal/fneg-reciprocal.
14636 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14637 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14638 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14639 return SDValue();
14640
14641 // Exit early if the target does not want this transform or if there can't
14642 // possibly be enough uses of the divisor to make the transform worthwhile.
14643 unsigned MinUses = TLI.combineRepeatedFPDivisors();
14644
14645 // For splat vectors, scale the number of uses by the splat factor. If we can
14646 // convert the division into a scalar op, that will likely be much faster.
14647 unsigned NumElts = 1;
14648 EVT VT = N->getValueType(0);
14649 if (VT.isVector() && DAG.isSplatValue(N1))
14651
14652 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14653 return SDValue();
14654
14655 // Find all FDIV users of the same divisor.
14656 // Use a set because duplicates may be present in the user list.
14658 for (auto *U : N1->uses()) {
14659 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14660 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14661 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14662 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14663 U->getFlags().hasAllowReassociation() &&
14664 U->getFlags().hasNoSignedZeros())
14665 continue;
14666
14667 // This division is eligible for optimization only if global unsafe math
14668 // is enabled or if this division allows reciprocal formation.
14669 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14670 Users.insert(U);
14671 }
14672 }
14673
14674 // Now that we have the actual number of divisor uses, make sure it meets
14675 // the minimum threshold specified by the target.
14676 if ((Users.size() * NumElts) < MinUses)
14677 return SDValue();
14678
14679 SDLoc DL(N);
14680 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14681 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14682
14683 // Dividend / Divisor -> Dividend * Reciprocal
14684 for (auto *U : Users) {
14685 SDValue Dividend = U->getOperand(0);
14686 if (Dividend != FPOne) {
14687 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14688 Reciprocal, Flags);
14689 CombineTo(U, NewNode);
14690 } else if (U != Reciprocal.getNode()) {
14691 // In the absence of fast-math-flags, this user node is always the
14692 // same node as Reciprocal, but with FMF they may be different nodes.
14693 CombineTo(U, Reciprocal);
14694 }
14695 }
14696 return SDValue(N, 0); // N was replaced.
14697}
14698
14699SDValue DAGCombiner::visitFDIV(SDNode *N) {
14700 SDValue N0 = N->getOperand(0);
14701 SDValue N1 = N->getOperand(1);
14702 EVT VT = N->getValueType(0);
14703 SDLoc DL(N);
14704 const TargetOptions &Options = DAG.getTarget().Options;
14705 SDNodeFlags Flags = N->getFlags();
14707
14708 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14709 return R;
14710
14711 // fold (fdiv c1, c2) -> c1/c2
14712 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
14713 return C;
14714
14715 // fold vector ops
14716 if (VT.isVector())
14718 return FoldedVOp;
14719
14721 return NewSel;
14722
14724 return V;
14725
14726 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14727 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14728 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14729 // Compute the reciprocal 1.0 / c2.
14730 const APFloat &N1APF = N1CFP->getValueAPF();
14731 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14733 // Only do the transform if the reciprocal is a legal fp immediate that
14734 // isn't too nasty (eg NaN, denormal, ...).
14735 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14736 (!LegalOperations ||
14737 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14738 // backend)... we should handle this gracefully after Legalize.
14739 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14741 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14742 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14743 DAG.getConstantFP(Recip, DL, VT));
14744 }
14745
14746 // If this FDIV is part of a reciprocal square root, it may be folded
14747 // into a target-specific square root estimate instruction.
14748 if (N1.getOpcode() == ISD::FSQRT) {
14749 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14750 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14751 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14752 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14753 if (SDValue RV =
14754 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14755 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14756 AddToWorklist(RV.getNode());
14757 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14758 }
14759 } else if (N1.getOpcode() == ISD::FP_ROUND &&
14760 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14761 if (SDValue RV =
14762 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14763 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14764 AddToWorklist(RV.getNode());
14765 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14766 }
14767 } else if (N1.getOpcode() == ISD::FMUL) {
14768 // Look through an FMUL. Even though this won't remove the FDIV directly,
14769 // it's still worthwhile to get rid of the FSQRT if possible.
14770 SDValue Sqrt, Y;
14771 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14772 Sqrt = N1.getOperand(0);
14773 Y = N1.getOperand(1);
14774 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14775 Sqrt = N1.getOperand(1);
14776 Y = N1.getOperand(0);
14777 }
14778 if (Sqrt.getNode()) {
14779 // If the other multiply operand is known positive, pull it into the
14780 // sqrt. That will eliminate the division if we convert to an estimate.
14781 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14782 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14783 SDValue A;
14784 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14785 A = Y.getOperand(0);
14786 else if (Y == Sqrt.getOperand(0))
14787 A = Y;
14788 if (A) {
14789 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14790 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14791 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14792 SDValue AAZ =
14793 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14794 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14795 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14796
14797 // Estimate creation failed. Clean up speculatively created nodes.
14798 recursivelyDeleteUnusedNodes(AAZ.getNode());
14799 }
14800 }
14801
14802 // We found a FSQRT, so try to make this fold:
14803 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14804 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14805 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14806 AddToWorklist(Div.getNode());
14807 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14808 }
14809 }
14810 }
14811
14812 // Fold into a reciprocal estimate and multiply instead of a real divide.
14813 if (Options.NoInfsFPMath || Flags.hasNoInfs())
14814 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14815 return RV;
14816 }
14817
14818 // Fold X/Sqrt(X) -> Sqrt(X)
14819 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14820 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14821 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14822 return N1;
14823
14824 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14829 SDValue NegN0 =
14830 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14831 SDValue NegN1 =
14832 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14833 if (NegN0 && NegN1 &&
14836 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14837
14838 return SDValue();
14839}
14840
14841SDValue DAGCombiner::visitFREM(SDNode *N) {
14842 SDValue N0 = N->getOperand(0);
14843 SDValue N1 = N->getOperand(1);
14844 EVT VT = N->getValueType(0);
14845 SDNodeFlags Flags = N->getFlags();
14847
14848 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14849 return R;
14850
14851 // fold (frem c1, c2) -> fmod(c1,c2)
14852 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
14853 return C;
14854
14856 return NewSel;
14857
14858 return SDValue();
14859}
14860
14861SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14862 SDNodeFlags Flags = N->getFlags();
14863 const TargetOptions &Options = DAG.getTarget().Options;
14864
14865 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14866 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14867 if (!Flags.hasApproximateFuncs() ||
14868 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14869 return SDValue();
14870
14871 SDValue N0 = N->getOperand(0);
14872 if (TLI.isFsqrtCheap(N0, DAG))
14873 return SDValue();
14874
14875 // FSQRT nodes have flags that propagate to the created nodes.
14876 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14877 // transform the fdiv, we may produce a sub-optimal estimate sequence
14878 // because the reciprocal calculation may not have to filter out a
14879 // 0.0 input.
14880 return buildSqrtEstimate(N0, Flags);
14881}
14882
14883/// copysign(x, fp_extend(y)) -> copysign(x, y)
14884/// copysign(x, fp_round(y)) -> copysign(x, y)
14886 SDValue N1 = N->getOperand(1);
14887 if ((N1.getOpcode() == ISD::FP_EXTEND ||
14888 N1.getOpcode() == ISD::FP_ROUND)) {
14889 EVT N1VT = N1->getValueType(0);
14890 EVT N1Op0VT = N1->getOperand(0).getValueType();
14891
14892 // Always fold no-op FP casts.
14893 if (N1VT == N1Op0VT)
14894 return true;
14895
14896 // Do not optimize out type conversion of f128 type yet.
14897 // For some targets like x86_64, configuration is changed to keep one f128
14898 // value in one SSE register, but instruction selection cannot handle
14899 // FCOPYSIGN on SSE registers yet.
14900 if (N1Op0VT == MVT::f128)
14901 return false;
14902
14903 // Avoid mismatched vector operand types, for better instruction selection.
14904 if (N1Op0VT.isVector())
14905 return false;
14906
14907 return true;
14908 }
14909 return false;
14910}
14911
14912SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14913 SDValue N0 = N->getOperand(0);
14914 SDValue N1 = N->getOperand(1);
14915 EVT VT = N->getValueType(0);
14916
14917 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
14918 if (SDValue C =
14920 return C;
14921
14922 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14923 const APFloat &V = N1C->getValueAPF();
14924 // copysign(x, c1) -> fabs(x) iff ispos(c1)
14925 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14926 if (!V.isNegative()) {
14927 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14928 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14929 } else {
14930 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14931 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14932 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14933 }
14934 }
14935
14936 // copysign(fabs(x), y) -> copysign(x, y)
14937 // copysign(fneg(x), y) -> copysign(x, y)
14938 // copysign(copysign(x,z), y) -> copysign(x, y)
14939 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14940 N0.getOpcode() == ISD::FCOPYSIGN)
14941 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14942
14943 // copysign(x, abs(y)) -> abs(x)
14944 if (N1.getOpcode() == ISD::FABS)
14945 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14946
14947 // copysign(x, copysign(y,z)) -> copysign(x, z)
14948 if (N1.getOpcode() == ISD::FCOPYSIGN)
14949 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14950
14951 // copysign(x, fp_extend(y)) -> copysign(x, y)
14952 // copysign(x, fp_round(y)) -> copysign(x, y)
14954 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14955
14956 return SDValue();
14957}
14958
14959SDValue DAGCombiner::visitFPOW(SDNode *N) {
14961 if (!ExponentC)
14962 return SDValue();
14964
14965 // Try to convert x ** (1/3) into cube root.
14966 // TODO: Handle the various flavors of long double.
14967 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14968 // Some range near 1/3 should be fine.
14969 EVT VT = N->getValueType(0);
14970 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14971 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14972 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14973 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14974 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
14975 // For regular numbers, rounding may cause the results to differ.
14976 // Therefore, we require { nsz ninf nnan afn } for this transform.
14977 // TODO: We could select out the special cases if we don't have nsz/ninf.
14978 SDNodeFlags Flags = N->getFlags();
14979 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14980 !Flags.hasApproximateFuncs())
14981 return SDValue();
14982
14983 // Do not create a cbrt() libcall if the target does not have it, and do not
14984 // turn a pow that has lowering support into a cbrt() libcall.
14985 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14988 return SDValue();
14989
14990 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14991 }
14992
14993 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14994 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14995 // TODO: This could be extended (using a target hook) to handle smaller
14996 // power-of-2 fractional exponents.
14997 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14998 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
15000 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
15001 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
15002 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
15003 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
15004 // For regular numbers, rounding may cause the results to differ.
15005 // Therefore, we require { nsz ninf afn } for this transform.
15006 // TODO: We could select out the special cases if we don't have nsz/ninf.
15007 SDNodeFlags Flags = N->getFlags();
15008
15009 // We only need no signed zeros for the 0.25 case.
15010 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
15011 !Flags.hasApproximateFuncs())
15012 return SDValue();
15013
15014 // Don't double the number of libcalls. We are trying to inline fast code.
15016 return SDValue();
15017
15018 // Assume that libcalls are the smallest code.
15019 // TODO: This restriction should probably be lifted for vectors.
15020 if (ForCodeSize)
15021 return SDValue();
15022
15023 // pow(X, 0.25) --> sqrt(sqrt(X))
15024 SDLoc DL(N);
15025 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
15027 if (ExponentIs025)
15028 return SqrtSqrt;
15029 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
15030 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
15031 }
15032
15033 return SDValue();
15034}
15035
15037 const TargetLowering &TLI) {
15038 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
15039 // replacing casts with a libcall. We also must be allowed to ignore -0.0
15040 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
15041 // conversions would return +0.0.
15042 // FIXME: We should be able to use node-level FMF here.
15043 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
15044 EVT VT = N->getValueType(0);
15045 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
15047 return SDValue();
15048
15049 // fptosi/fptoui round towards zero, so converting from FP to integer and
15050 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
15051 SDValue N0 = N->getOperand(0);
15052 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
15053 N0.getOperand(0).getValueType() == VT)
15054 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15055
15056 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
15057 N0.getOperand(0).getValueType() == VT)
15058 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15059
15060 return SDValue();
15061}
15062
15063SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
15064 SDValue N0 = N->getOperand(0);
15065 EVT VT = N->getValueType(0);
15066 EVT OpVT = N0.getValueType();
15067
15068 // [us]itofp(undef) = 0, because the result value is bounded.
15069 if (N0.isUndef())
15070 return DAG.getConstantFP(0.0, SDLoc(N), VT);
15071
15072 // fold (sint_to_fp c1) -> c1fp
15074 // ...but only if the target supports immediate floating-point values
15075 (!LegalOperations ||
15077 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15078
15079 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
15080 // but UINT_TO_FP is legal on this target, try to convert.
15083 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
15084 if (DAG.SignBitIsZero(N0))
15085 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15086 }
15087
15088 // The next optimizations are desirable only if SELECT_CC can be lowered.
15089 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
15090 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
15091 !VT.isVector() &&
15092 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15093 SDLoc DL(N);
15094 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
15095 DAG.getConstantFP(0.0, DL, VT));
15096 }
15097
15098 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
15099 // (select (setcc x, y, cc), 1.0, 0.0)
15100 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
15101 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
15102 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15103 SDLoc DL(N);
15104 return DAG.getSelect(DL, VT, N0.getOperand(0),
15105 DAG.getConstantFP(1.0, DL, VT),
15106 DAG.getConstantFP(0.0, DL, VT));
15107 }
15108
15109 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15110 return FTrunc;
15111
15112 return SDValue();
15113}
15114
15115SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
15116 SDValue N0 = N->getOperand(0);
15117 EVT VT = N->getValueType(0);
15118 EVT OpVT = N0.getValueType();
15119
15120 // [us]itofp(undef) = 0, because the result value is bounded.
15121 if (N0.isUndef())
15122 return DAG.getConstantFP(0.0, SDLoc(N), VT);
15123
15124 // fold (uint_to_fp c1) -> c1fp
15126 // ...but only if the target supports immediate floating-point values
15127 (!LegalOperations ||
15129 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15130
15131 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15132 // but SINT_TO_FP is legal on this target, try to convert.
15135 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15136 if (DAG.SignBitIsZero(N0))
15137 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15138 }
15139
15140 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15141 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15142 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15143 SDLoc DL(N);
15144 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15145 DAG.getConstantFP(0.0, DL, VT));
15146 }
15147
15148 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15149 return FTrunc;
15150
15151 return SDValue();
15152}
15153
15154// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15156 SDValue N0 = N->getOperand(0);
15157 EVT VT = N->getValueType(0);
15158
15159 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15160 return SDValue();
15161
15162 SDValue Src = N0.getOperand(0);
15163 EVT SrcVT = Src.getValueType();
15165 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15166
15167 // We can safely assume the conversion won't overflow the output range,
15168 // because (for example) (uint8_t)18293.f is undefined behavior.
15169
15170 // Since we can assume the conversion won't overflow, our decision as to
15171 // whether the input will fit in the float should depend on the minimum
15172 // of the input range and output range.
15173
15174 // This means this is also safe for a signed input and unsigned output, since
15175 // a negative input would lead to undefined behavior.
15176 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15177 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
15178 unsigned ActualSize = std::min(InputSize, OutputSize);
15180
15181 // We can only fold away the float conversion if the input range can be
15182 // represented exactly in the float range.
15184 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15187 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15188 }
15189 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15190 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15191 return DAG.getBitcast(VT, Src);
15192 }
15193 return SDValue();
15194}
15195
15196SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15197 SDValue N0 = N->getOperand(0);
15198 EVT VT = N->getValueType(0);
15199
15200 // fold (fp_to_sint undef) -> undef
15201 if (N0.isUndef())
15202 return DAG.getUNDEF(VT);
15203
15204 // fold (fp_to_sint c1fp) -> c1
15206 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15207
15208 return FoldIntToFPToInt(N, DAG);
15209}
15210
15211SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15212 SDValue N0 = N->getOperand(0);
15213 EVT VT = N->getValueType(0);
15214
15215 // fold (fp_to_uint undef) -> undef
15216 if (N0.isUndef())
15217 return DAG.getUNDEF(VT);
15218
15219 // fold (fp_to_uint c1fp) -> c1
15221 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15222
15223 return FoldIntToFPToInt(N, DAG);
15224}
15225
15226SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15227 SDValue N0 = N->getOperand(0);
15228 SDValue N1 = N->getOperand(1);
15230 EVT VT = N->getValueType(0);
15231
15232 // fold (fp_round c1fp) -> c1fp
15233 if (N0CFP)
15234 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15235
15236 // fold (fp_round (fp_extend x)) -> x
15237 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15238 return N0.getOperand(0);
15239
15240 // fold (fp_round (fp_round x)) -> (fp_round x)
15241 if (N0.getOpcode() == ISD::FP_ROUND) {
15242 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15243 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15244
15245 // Skip this folding if it results in an fp_round from f80 to f16.
15246 //
15247 // f80 to f16 always generates an expensive (and as yet, unimplemented)
15248 // libcall to __truncxfhf2 instead of selecting native f16 conversion
15249 // instructions from f32 or f64. Moreover, the first (value-preserving)
15250 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15251 // x86.
15252 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15253 return SDValue();
15254
15255 // If the first fp_round isn't a value preserving truncation, it might
15256 // introduce a tie in the second fp_round, that wouldn't occur in the
15257 // single-step fp_round we want to fold to.
15258 // In other words, double rounding isn't the same as rounding.
15259 // Also, this is a value preserving truncation iff both fp_round's are.
15260 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15261 SDLoc DL(N);
15262 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15264 }
15265 }
15266
15267 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15268 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
15269 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15270 N0.getOperand(0), N1);
15271 AddToWorklist(Tmp.getNode());
15272 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15273 Tmp, N0.getOperand(1));
15274 }
15275
15277 return NewVSel;
15278
15279 return SDValue();
15280}
15281
15282SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15283 SDValue N0 = N->getOperand(0);
15284 EVT VT = N->getValueType(0);
15285
15286 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15287 if (N->hasOneUse() &&
15288 N->use_begin()->getOpcode() == ISD::FP_ROUND)
15289 return SDValue();
15290
15291 // fold (fp_extend c1fp) -> c1fp
15293 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15294
15295 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15296 if (N0.getOpcode() == ISD::FP16_TO_FP &&
15298 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15299
15300 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15301 // value of X.
15302 if (N0.getOpcode() == ISD::FP_ROUND
15303 && N0.getConstantOperandVal(1) == 1) {
15304 SDValue In = N0.getOperand(0);
15305 if (In.getValueType() == VT) return In;
15306 if (VT.bitsLT(In.getValueType()))
15307 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15308 In, N0.getOperand(1));
15309 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15310 }
15311
15312 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15313 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15317 LN0->getChain(),
15318 LN0->getBasePtr(), N0.getValueType(),
15319 LN0->getMemOperand());
15320 CombineTo(N, ExtLoad);
15321 CombineTo(N0.getNode(),
15322 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
15323 N0.getValueType(), ExtLoad,
15324 DAG.getIntPtrConstant(1, SDLoc(N0))),
15325 ExtLoad.getValue(1));
15326 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15327 }
15328
15330 return NewVSel;
15331
15332 return SDValue();
15333}
15334
15335SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15336 SDValue N0 = N->getOperand(0);
15337 EVT VT = N->getValueType(0);
15338
15339 // fold (fceil c1) -> fceil(c1)
15341 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15342
15343 return SDValue();
15344}
15345
15346SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15347 SDValue N0 = N->getOperand(0);
15348 EVT VT = N->getValueType(0);
15349
15350 // fold (ftrunc c1) -> ftrunc(c1)
15352 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15353
15354 // fold ftrunc (known rounded int x) -> x
15355 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15356 // likely to be generated to extract integer from a rounded floating value.
15357 switch (N0.getOpcode()) {
15358 default: break;
15359 case ISD::FRINT:
15360 case ISD::FTRUNC:
15361 case ISD::FNEARBYINT:
15362 case ISD::FFLOOR:
15363 case ISD::FCEIL:
15364 return N0;
15365 }
15366
15367 return SDValue();
15368}
15369
15370SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15371 SDValue N0 = N->getOperand(0);
15372 EVT VT = N->getValueType(0);
15373
15374 // fold (ffloor c1) -> ffloor(c1)
15376 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15377
15378 return SDValue();
15379}
15380
15381SDValue DAGCombiner::visitFNEG(SDNode *N) {
15382 SDValue N0 = N->getOperand(0);
15383 EVT VT = N->getValueType(0);
15385
15386 // Constant fold FNEG.
15388 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15389
15390 if (SDValue NegN0 =
15391 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15392 return NegN0;
15393
15394 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15395 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15396 // know it was called from a context with a nsz flag if the input fsub does
15397 // not.
15398 if (N0.getOpcode() == ISD::FSUB &&
15400 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15401 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15402 N0.getOperand(0));
15403 }
15404
15406 return Cast;
15407
15408 return SDValue();
15409}
15410
15411SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15412 SDValue N0 = N->getOperand(0);
15413 SDValue N1 = N->getOperand(1);
15414 EVT VT = N->getValueType(0);
15415 const SDNodeFlags Flags = N->getFlags();
15416 unsigned Opc = N->getOpcode();
15417 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15418 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15420
15421 // Constant fold.
15422 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15423 return C;
15424
15425 // Canonicalize to constant on RHS.
15428 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15429
15431 const APFloat &AF = N1CFP->getValueAPF();
15432
15433 // minnum(X, nan) -> X
15434 // maxnum(X, nan) -> X
15435 // minimum(X, nan) -> nan
15436 // maximum(X, nan) -> nan
15437 if (AF.isNaN())
15438 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15439
15440 // In the following folds, inf can be replaced with the largest finite
15441 // float, if the ninf flag is set.
15442 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15443 // minnum(X, -inf) -> -inf
15444 // maxnum(X, +inf) -> +inf
15445 // minimum(X, -inf) -> -inf if nnan
15446 // maximum(X, +inf) -> +inf if nnan
15447 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15448 return N->getOperand(1);
15449
15450 // minnum(X, +inf) -> X if nnan
15451 // maxnum(X, -inf) -> X if nnan
15452 // minimum(X, +inf) -> X
15453 // maximum(X, -inf) -> X
15454 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15455 return N->getOperand(0);
15456 }
15457 }
15458
15459 return SDValue();
15460}
15461
15462SDValue DAGCombiner::visitFABS(SDNode *N) {
15463 SDValue N0 = N->getOperand(0);
15464 EVT VT = N->getValueType(0);
15465
15466 // fold (fabs c1) -> fabs(c1)
15468 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15469
15470 // fold (fabs (fabs x)) -> (fabs x)
15471 if (N0.getOpcode() == ISD::FABS)
15472 return N->getOperand(0);
15473
15474 // fold (fabs (fneg x)) -> (fabs x)
15475 // fold (fabs (fcopysign x, y)) -> (fabs x)
15476 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15477 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15478
15480 return Cast;
15481
15482 return SDValue();
15483}
15484
15485SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15486 SDValue Chain = N->getOperand(0);
15487 SDValue N1 = N->getOperand(1);
15488 SDValue N2 = N->getOperand(2);
15489
15490 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15491 // nondeterministic jumps).
15492 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15493 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15494 N1->getOperand(0), N2);
15495 }
15496
15497 // If N is a constant we could fold this into a fallthrough or unconditional
15498 // branch. However that doesn't happen very often in normal code, because
15499 // Instcombine/SimplifyCFG should have handled the available opportunities.
15500 // If we did this folding here, it would be necessary to update the
15501 // MachineBasicBlock CFG, which is awkward.
15502
15503 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15504 // on the target.
15505 if (N1.getOpcode() == ISD::SETCC &&
15507 N1.getOperand(0).getValueType())) {
15508 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15509 Chain, N1.getOperand(2),
15510 N1.getOperand(0), N1.getOperand(1), N2);
15511 }
15512
15513 if (N1.hasOneUse()) {
15514 // rebuildSetCC calls visitXor which may change the Chain when there is a
15515 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15518 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15519 ChainHandle.getValue(), NewN1, N2);
15520 }
15521
15522 return SDValue();
15523}
15524
15525SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15526 if (N.getOpcode() == ISD::SRL ||
15527 (N.getOpcode() == ISD::TRUNCATE &&
15528 (N.getOperand(0).hasOneUse() &&
15529 N.getOperand(0).getOpcode() == ISD::SRL))) {
15530 // Look pass the truncate.
15531 if (N.getOpcode() == ISD::TRUNCATE)
15532 N = N.getOperand(0);
15533
15534 // Match this pattern so that we can generate simpler code:
15535 //
15536 // %a = ...
15537 // %b = and i32 %a, 2
15538 // %c = srl i32 %b, 1
15539 // brcond i32 %c ...
15540 //
15541 // into
15542 //
15543 // %a = ...
15544 // %b = and i32 %a, 2
15545 // %c = setcc eq %b, 0
15546 // brcond %c ...
15547 //
15548 // This applies only when the AND constant value has one bit set and the
15549 // SRL constant is equal to the log2 of the AND constant. The back-end is
15550 // smart enough to convert the result into a TEST/JMP sequence.
15551 SDValue Op0 = N.getOperand(0);
15552 SDValue Op1 = N.getOperand(1);
15553
15554 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15555 SDValue AndOp1 = Op0.getOperand(1);
15556
15557 if (AndOp1.getOpcode() == ISD::Constant) {
15558 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15559
15560 if (AndConst.isPowerOf2() &&
15561 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15562 SDLoc DL(N);
15563 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15564 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15565 ISD::SETNE);
15566 }
15567 }
15568 }
15569 }
15570
15571 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15572 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15573 if (N.getOpcode() == ISD::XOR) {
15574 // Because we may call this on a speculatively constructed
15575 // SimplifiedSetCC Node, we need to simplify this node first.
15576 // Ideally this should be folded into SimplifySetCC and not
15577 // here. For now, grab a handle to N so we don't lose it from
15578 // replacements interal to the visit.
15580 while (N.getOpcode() == ISD::XOR) {
15581 SDValue Tmp = visitXOR(N.getNode());
15582 // No simplification done.
15583 if (!Tmp.getNode())
15584 break;
15585 // Returning N is form in-visit replacement that may invalidated
15586 // N. Grab value from Handle.
15587 if (Tmp.getNode() == N.getNode())
15588 N = XORHandle.getValue();
15589 else // Node simplified. Try simplifying again.
15590 N = Tmp;
15591 }
15592
15593 if (N.getOpcode() != ISD::XOR)
15594 return N;
15595
15596 SDValue Op0 = N->getOperand(0);
15597 SDValue Op1 = N->getOperand(1);
15598
15599 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15600 bool Equal = false;
15601 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15602 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15603 Op0.getValueType() == MVT::i1) {
15604 N = Op0;
15605 Op0 = N->getOperand(0);
15606 Op1 = N->getOperand(1);
15607 Equal = true;
15608 }
15609
15610 EVT SetCCVT = N.getValueType();
15611 if (LegalTypes)
15613 // Replace the uses of XOR with SETCC
15614 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15615 Equal ? ISD::SETEQ : ISD::SETNE);
15616 }
15617 }
15618
15619 return SDValue();
15620}
15621
15622// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15623//
15624SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15625 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15626 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15627
15628 // If N is a constant we could fold this into a fallthrough or unconditional
15629 // branch. However that doesn't happen very often in normal code, because
15630 // Instcombine/SimplifyCFG should have handled the available opportunities.
15631 // If we did this folding here, it would be necessary to update the
15632 // MachineBasicBlock CFG, which is awkward.
15633
15634 // Use SimplifySetCC to simplify SETCC's.
15636 CondLHS, CondRHS, CC->get(), SDLoc(N),
15637 false);
15638 if (Simp.getNode()) AddToWorklist(Simp.getNode());
15639
15640 // fold to a simpler setcc
15641 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15642 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15643 N->getOperand(0), Simp.getOperand(2),
15644 Simp.getOperand(0), Simp.getOperand(1),
15645 N->getOperand(4));
15646
15647 return SDValue();
15648}
15649
15650static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15651 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15652 const TargetLowering &TLI) {
15653 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15654 if (LD->isIndexed())
15655 return false;
15656 EVT VT = LD->getMemoryVT();
15657 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15658 return false;
15659 Ptr = LD->getBasePtr();
15660 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15661 if (ST->isIndexed())
15662 return false;
15663 EVT VT = ST->getMemoryVT();
15664 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15665 return false;
15666 Ptr = ST->getBasePtr();
15667 IsLoad = false;
15668 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15669 if (LD->isIndexed())
15670 return false;
15671 EVT VT = LD->getMemoryVT();
15672 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15673 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15674 return false;
15675 Ptr = LD->getBasePtr();
15676 IsMasked = true;
15678 if (ST->isIndexed())
15679 return false;
15680 EVT VT = ST->getMemoryVT();
15681 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15682 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15683 return false;
15684 Ptr = ST->getBasePtr();
15685 IsLoad = false;
15686 IsMasked = true;
15687 } else {
15688 return false;
15689 }
15690 return true;
15691}
15692
15693/// Try turning a load/store into a pre-indexed load/store when the base
15694/// pointer is an add or subtract and it has other uses besides the load/store.
15695/// After the transformation, the new indexed load/store has effectively folded
15696/// the add/subtract in and all of its other uses are redirected to the
15697/// new load/store.
15698bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15699 if (Level < AfterLegalizeDAG)
15700 return false;
15701
15702 bool IsLoad = true;
15703 bool IsMasked = false;
15704 SDValue Ptr;
15706 Ptr, TLI))
15707 return false;
15708
15709 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15710 // out. There is no reason to make this a preinc/predec.
15711 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15712 Ptr.getNode()->hasOneUse())
15713 return false;
15714
15715 // Ask the target to do addressing mode selection.
15719 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15720 return false;
15721
15722 // Backends without true r+i pre-indexed forms may need to pass a
15723 // constant base with a variable offset so that constant coercion
15724 // will work with the patterns in canonical form.
15725 bool Swapped = false;
15726 if (isa<ConstantSDNode>(BasePtr)) {
15727 std::swap(BasePtr, Offset);
15728 Swapped = true;
15729 }
15730
15731 // Don't create a indexed load / store with zero offset.
15733 return false;
15734
15735 // Try turning it into a pre-indexed load / store except when:
15736 // 1) The new base ptr is a frame index.
15737 // 2) If N is a store and the new base ptr is either the same as or is a
15738 // predecessor of the value being stored.
15739 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15740 // that would create a cycle.
15741 // 4) All uses are load / store ops that use it as old base ptr.
15742
15743 // Check #1. Preinc'ing a frame index would require copying the stack pointer
15744 // (plus the implicit offset) to a register to preinc anyway.
15745 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15746 return false;
15747
15748 // Check #2.
15749 if (!IsLoad) {
15750 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15751 : cast<StoreSDNode>(N)->getValue();
15752
15753 // Would require a copy.
15754 if (Val == BasePtr)
15755 return false;
15756
15757 // Would create a cycle.
15758 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15759 return false;
15760 }
15761
15762 // Caches for hasPredecessorHelper.
15765 Worklist.push_back(N);
15766
15767 // If the offset is a constant, there may be other adds of constants that
15768 // can be folded with this one. We should do this to avoid having to keep
15769 // a copy of the original base pointer.
15772 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15773 UE = BasePtr.getNode()->use_end();
15774 UI != UE; ++UI) {
15775 SDUse &Use = UI.getUse();
15776 // Skip the use that is Ptr and uses of other results from BasePtr's
15777 // node (important for nodes that return multiple results).
15778 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15779 continue;
15780
15781 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15782 continue;
15783
15784 if (Use.getUser()->getOpcode() != ISD::ADD &&
15785 Use.getUser()->getOpcode() != ISD::SUB) {
15786 OtherUses.clear();
15787 break;
15788 }
15789
15790 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15791 if (!isa<ConstantSDNode>(Op1)) {
15792 OtherUses.clear();
15793 break;
15794 }
15795
15796 // FIXME: In some cases, we can be smarter about this.
15797 if (Op1.getValueType() != Offset.getValueType()) {
15798 OtherUses.clear();
15799 break;
15800 }
15801
15802 OtherUses.push_back(Use.getUser());
15803 }
15804
15805 if (Swapped)
15806 std::swap(BasePtr, Offset);
15807
15808 // Now check for #3 and #4.
15809 bool RealUse = false;
15810
15811 for (SDNode *Use : Ptr.getNode()->uses()) {
15812 if (Use == N)
15813 continue;
15814 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15815 return false;
15816
15817 // If Ptr may be folded in addressing mode of other use, then it's
15818 // not profitable to do this transformation.
15819 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15820 RealUse = true;
15821 }
15822
15823 if (!RealUse)
15824 return false;
15825
15827 if (!IsMasked) {
15828 if (IsLoad)
15829 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15830 else
15831 Result =
15832 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15833 } else {
15834 if (IsLoad)
15835 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15836 Offset, AM);
15837 else
15838 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15839 Offset, AM);
15840 }
15842 ++NodesCombined;
15843 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15844 Result.getNode()->dump(&DAG); dbgs() << '\n');
15845 WorklistRemover DeadNodes(*this);
15846 if (IsLoad) {
15847 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15848 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15849 } else {
15850 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15851 }
15852
15853 // Finally, since the node is now dead, remove it from the graph.
15855
15856 if (Swapped)
15857 std::swap(BasePtr, Offset);
15858
15859 // Replace other uses of BasePtr that can be updated to use Ptr
15860 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15861 unsigned OffsetIdx = 1;
15862 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15863 OffsetIdx = 0;
15864 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15865 BasePtr.getNode() && "Expected BasePtr operand");
15866
15867 // We need to replace ptr0 in the following expression:
15868 // x0 * offset0 + y0 * ptr0 = t0
15869 // knowing that
15870 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15871 //
15872 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15873 // indexed load/store and the expression that needs to be re-written.
15874 //
15875 // Therefore, we have:
15876 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15877
15878 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15879 const APInt &Offset0 = CN->getAPIntValue();
15880 const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15881 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15882 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15883 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15884 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15885
15886 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15887
15888 APInt CNV = Offset0;
15889 if (X0 < 0) CNV = -CNV;
15890 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15891 else CNV = CNV - Offset1;
15892
15893 SDLoc DL(OtherUses[i]);
15894
15895 // We can now generate the new expression.
15896 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15897 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15898
15899 SDValue NewUse = DAG.getNode(Opcode,
15900 DL,
15904 }
15905
15906 // Replace the uses of Ptr with uses of the updated base value.
15907 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15909 AddToWorklist(Result.getNode());
15910
15911 return true;
15912}
15913
15914static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15915 SDValue &BasePtr, SDValue &Offset,
15917 SelectionDAG &DAG,
15918 const TargetLowering &TLI) {
15919 if (PtrUse == N ||
15920 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15921 return false;
15922
15923 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15924 return false;
15925
15926 // Don't create a indexed load / store with zero offset.
15928 return false;
15929
15930 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15931 return false;
15932
15934 for (SDNode *Use : BasePtr.getNode()->uses()) {
15935 if (Use == Ptr.getNode())
15936 continue;
15937
15938 // No if there's a later user which could perform the index instead.
15939 if (isa<MemSDNode>(Use)) {
15940 bool IsLoad = true;
15941 bool IsMasked = false;
15944 IsMasked, OtherPtr, TLI)) {
15946 Worklist.push_back(Use);
15947 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15948 return false;
15949 }
15950 }
15951
15952 // If all the uses are load / store addresses, then don't do the
15953 // transformation.
15954 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15955 for (SDNode *UseUse : Use->uses())
15956 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15957 return false;
15958 }
15959 }
15960 return true;
15961}
15962
15964 bool &IsMasked, SDValue &Ptr,
15965 SDValue &BasePtr, SDValue &Offset,
15967 SelectionDAG &DAG,
15968 const TargetLowering &TLI) {
15970 IsMasked, Ptr, TLI) ||
15971 Ptr.getNode()->hasOneUse())
15972 return nullptr;
15973
15974 // Try turning it into a post-indexed load / store except when
15975 // 1) All uses are load / store ops that use it as base ptr (and
15976 // it may be folded as addressing mmode).
15977 // 2) Op must be independent of N, i.e. Op is neither a predecessor
15978 // nor a successor of N. Otherwise, if Op is folded that would
15979 // create a cycle.
15980 for (SDNode *Op : Ptr->uses()) {
15981 // Check for #1.
15982 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15983 continue;
15984
15985 // Check for #2.
15988 // Ptr is predecessor to both N and Op.
15989 Visited.insert(Ptr.getNode());
15990 Worklist.push_back(N);
15991 Worklist.push_back(Op);
15992 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15993 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15994 return Op;
15995 }
15996 return nullptr;
15997}
15998
15999/// Try to combine a load/store with a add/sub of the base pointer node into a
16000/// post-indexed load/store. The transformation folded the add/subtract into the
16001/// new indexed load/store effectively and all of its uses are redirected to the
16002/// new load/store.
16003bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
16004 if (Level < AfterLegalizeDAG)
16005 return false;
16006
16007 bool IsLoad = true;
16008 bool IsMasked = false;
16009 SDValue Ptr;
16013 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
16014 Offset, AM, DAG, TLI);
16015 if (!Op)
16016 return false;
16017
16019 if (!IsMasked)
16020 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16021 Offset, AM)
16022 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
16023 BasePtr, Offset, AM);
16024 else
16025 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
16026 BasePtr, Offset, AM)
16028 BasePtr, Offset, AM);
16030 ++NodesCombined;
16031 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
16032 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
16033 dbgs() << '\n');
16034 WorklistRemover DeadNodes(*this);
16035 if (IsLoad) {
16036 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16037 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16038 } else {
16039 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16040 }
16041
16042 // Finally, since the node is now dead, remove it from the graph.
16044
16045 // Replace the uses of Use with uses of the updated base value.
16047 Result.getValue(IsLoad ? 1 : 0));
16049 return true;
16050}
16051
16052/// Return the base-pointer arithmetic from an indexed \p LD.
16053SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
16054 ISD::MemIndexedMode AM = LD->getAddressingMode();
16055 assert(AM != ISD::UNINDEXED);
16056 SDValue BP = LD->getOperand(1);
16057 SDValue Inc = LD->getOperand(2);
16058
16059 // Some backends use TargetConstants for load offsets, but don't expect
16060 // TargetConstants in general ADD nodes. We can convert these constants into
16061 // regular Constants (if the constant is not opaque).
16063 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
16064 "Cannot split out indexing using opaque target constants");
16065 if (Inc.getOpcode() == ISD::TargetConstant) {
16066 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
16067 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
16068 ConstInc->getValueType(0));
16069 }
16070
16071 unsigned Opc =
16072 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
16073 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
16074}
16075
16077 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
16078}
16079
16080bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
16081 Val = ST->getValue();
16082 EVT STType = Val.getValueType();
16083 EVT STMemType = ST->getMemoryVT();
16084 if (STType == STMemType)
16085 return true;
16087 return false; // fail.
16088 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
16090 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
16091 return true;
16092 }
16094 STType.isInteger() && STMemType.isInteger()) {
16095 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
16096 return true;
16097 }
16098 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
16099 Val = DAG.getBitcast(STMemType, Val);
16100 return true;
16101 }
16102 return false; // fail.
16103}
16104
16105bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
16106 EVT LDMemType = LD->getMemoryVT();
16107 EVT LDType = LD->getValueType(0);
16108 assert(Val.getValueType() == LDMemType &&
16109 "Attempting to extend value of non-matching type");
16110 if (LDType == LDMemType)
16111 return true;
16112 if (LDMemType.isInteger() && LDType.isInteger()) {
16113 switch (LD->getExtensionType()) {
16114 case ISD::NON_EXTLOAD:
16115 Val = DAG.getBitcast(LDType, Val);
16116 return true;
16117 case ISD::EXTLOAD:
16118 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
16119 return true;
16120 case ISD::SEXTLOAD:
16121 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
16122 return true;
16123 case ISD::ZEXTLOAD:
16124 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
16125 return true;
16126 }
16127 }
16128 return false;
16129}
16130
16131SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16132 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16133 return SDValue();
16134 SDValue Chain = LD->getOperand(0);
16135 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16136 // TODO: Relax this restriction for unordered atomics (see D66309)
16137 if (!ST || !ST->isSimple())
16138 return SDValue();
16139
16140 EVT LDType = LD->getValueType(0);
16141 EVT LDMemType = LD->getMemoryVT();
16142 EVT STMemType = ST->getMemoryVT();
16143 EVT STType = ST->getValue().getValueType();
16144
16145 // There are two cases to consider here:
16146 // 1. The store is fixed width and the load is scalable. In this case we
16147 // don't know at compile time if the store completely envelops the load
16148 // so we abandon the optimisation.
16149 // 2. The store is scalable and the load is fixed width. We could
16150 // potentially support a limited number of cases here, but there has been
16151 // no cost-benefit analysis to prove it's worth it.
16152 bool LdStScalable = LDMemType.isScalableVector();
16153 if (LdStScalable != STMemType.isScalableVector())
16154 return SDValue();
16155
16156 // If we are dealing with scalable vectors on a big endian platform the
16157 // calculation of offsets below becomes trickier, since we do not know at
16158 // compile time the absolute size of the vector. Until we've done more
16159 // analysis on big-endian platforms it seems better to bail out for now.
16160 if (LdStScalable && DAG.getDataLayout().isBigEndian())
16161 return SDValue();
16162
16165 int64_t Offset;
16166 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16167 return SDValue();
16168
16169 // Normalize for Endianness. After this Offset=0 will denote that the least
16170 // significant bit in the loaded value maps to the least significant bit in
16171 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16172 // n:th least significant byte of the stored value.
16173 if (DAG.getDataLayout().isBigEndian())
16174 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16175 (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16176 8 -
16177 Offset;
16178
16179 // Check that the stored value cover all bits that are loaded.
16180 bool STCoversLD;
16181
16182 TypeSize LdMemSize = LDMemType.getSizeInBits();
16183 TypeSize StMemSize = STMemType.getSizeInBits();
16184 if (LdStScalable)
16185 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16186 else
16187 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16188 StMemSize.getFixedSize());
16189
16190 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16191 if (LD->isIndexed()) {
16192 // Cannot handle opaque target constants and we must respect the user's
16193 // request not to split indexes from loads.
16194 if (!canSplitIdx(LD))
16195 return SDValue();
16197 SDValue Ops[] = {Val, Idx, Chain};
16198 return CombineTo(LD, Ops, 3);
16199 }
16200 return CombineTo(LD, Val, Chain);
16201 };
16202
16203 if (!STCoversLD)
16204 return SDValue();
16205
16206 // Memory as copy space (potentially masked).
16207 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16208 // Simple case: Direct non-truncating forwarding
16209 if (LDType.getSizeInBits() == LdMemSize)
16210 return ReplaceLd(LD, ST->getValue(), Chain);
16211 // Can we model the truncate and extension with an and mask?
16212 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16213 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16214 // Mask to size of LDMemType
16215 auto Mask =
16216 DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
16217 StMemSize.getFixedSize()),
16218 SDLoc(ST), STType);
16219 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16220 return ReplaceLd(LD, Val, Chain);
16221 }
16222 }
16223
16224 // TODO: Deal with nonzero offset.
16225 if (LD->getBasePtr().isUndef() || Offset != 0)
16226 return SDValue();
16227 // Model necessary truncations / extenstions.
16228 SDValue Val;
16229 // Truncate Value To Stored Memory Size.
16230 do {
16231 if (!getTruncatedStoreValue(ST, Val))
16232 continue;
16233 if (!isTypeLegal(LDMemType))
16234 continue;
16235 if (STMemType != LDMemType) {
16236 // TODO: Support vectors? This requires extract_subvector/bitcast.
16237 if (!STMemType.isVector() && !LDMemType.isVector() &&
16238 STMemType.isInteger() && LDMemType.isInteger())
16239 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16240 else
16241 continue;
16242 }
16243 if (!extendLoadedValueToExtension(LD, Val))
16244 continue;
16245 return ReplaceLd(LD, Val, Chain);
16246 } while (false);
16247
16248 // On failure, cleanup dead nodes we may have created.
16249 if (Val->use_empty())
16251 return SDValue();
16252}
16253
16254SDValue DAGCombiner::visitLOAD(SDNode *N) {
16256 SDValue Chain = LD->getChain();
16257 SDValue Ptr = LD->getBasePtr();
16258
16259 // If load is not volatile and there are no uses of the loaded value (and
16260 // the updated indexed value in case of indexed loads), change uses of the
16261 // chain value into uses of the chain input (i.e. delete the dead load).
16262 // TODO: Allow this for unordered atomics (see D66309)
16263 if (LD->isSimple()) {
16264 if (N->getValueType(1) == MVT::Other) {
16265 // Unindexed loads.
16266 if (!N->hasAnyUseOfValue(0)) {
16267 // It's not safe to use the two value CombineTo variant here. e.g.
16268 // v1, chain2 = load chain1, loc
16269 // v2, chain3 = load chain2, loc
16270 // v3 = add v2, c
16271 // Now we replace use of chain2 with chain1. This makes the second load
16272 // isomorphic to the one we are deleting, and thus makes this load live.
16273 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
16274 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
16275 dbgs() << "\n");
16276 WorklistRemover DeadNodes(*this);
16277 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16278 AddUsersToWorklist(Chain.getNode());
16279 if (N->use_empty())
16281
16282 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16283 }
16284 } else {
16285 // Indexed loads.
16286 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
16287
16288 // If this load has an opaque TargetConstant offset, then we cannot split
16289 // the indexing into an add/sub directly (that TargetConstant may not be
16290 // valid for a different type of node, and we cannot convert an opaque
16291 // target constant into a regular constant).
16292 bool CanSplitIdx = canSplitIdx(LD);
16293
16294 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16295 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16296 SDValue Index;
16297 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16298 Index = SplitIndexingFromLoad(LD);
16299 // Try to fold the base pointer arithmetic into subsequent loads and
16300 // stores.
16302 } else
16303 Index = DAG.getUNDEF(N->getValueType(1));
16304 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
16305 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
16306 dbgs() << " and 2 other values\n");
16307 WorklistRemover DeadNodes(*this);
16308 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
16309 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
16310 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16312 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16313 }
16314 }
16315 }
16316
16317 // If this load is directly stored, replace the load value with the stored
16318 // value.
16319 if (auto V = ForwardStoreValueToDirectLoad(LD))
16320 return V;
16321
16322 // Try to infer better alignment information than the load already has.
16323 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16324 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16325 if (*Alignment > LD->getAlign() &&
16326 isAligned(*Alignment, LD->getSrcValueOffset())) {
16328 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16329 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16330 LD->getMemOperand()->getFlags(), LD->getAAInfo());
16331 // NewLoad will always be N as we are only refining the alignment
16332 assert(NewLoad.getNode() == N);
16333 (void)NewLoad;
16334 }
16335 }
16336 }
16337
16338 if (LD->isUnindexed()) {
16339 // Walk up chain skipping non-aliasing memory nodes.
16340 SDValue BetterChain = FindBetterChain(LD, Chain);
16341
16342 // If there is a better chain.
16343 if (Chain != BetterChain) {
16345
16346 // Replace the chain to void dependency.
16347 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16348 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16349 BetterChain, Ptr, LD->getMemOperand());
16350 } else {
16351 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16352 LD->getValueType(0),
16353 BetterChain, Ptr, LD->getMemoryVT(),
16354 LD->getMemOperand());
16355 }
16356
16357 // Create token factor to keep old chain connected.
16358 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16359 MVT::Other, Chain, ReplLoad.getValue(1));
16360
16361 // Replace uses with load result and token factor
16362 return CombineTo(N, ReplLoad.getValue(0), Token);
16363 }
16364 }
16365
16366 // Try transforming N to an indexed load.
16368 return SDValue(N, 0);
16369
16370 // Try to slice up N to more direct loads if the slices are mapped to
16371 // different register banks or pairing can take place.
16372 if (SliceUpLoad(N))
16373 return SDValue(N, 0);
16374
16375 return SDValue();
16376}
16377
16378namespace {
16379
16380/// Helper structure used to slice a load in smaller loads.
16381/// Basically a slice is obtained from the following sequence:
16382/// Origin = load Ty1, Base
16383/// Shift = srl Ty1 Origin, CstTy Amount
16384/// Inst = trunc Shift to Ty2
16385///
16386/// Then, it will be rewritten into:
16387/// Slice = load SliceTy, Base + SliceOffset
16388/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16389///
16390/// SliceTy is deduced from the number of bits that are actually used to
16391/// build Inst.
16392struct LoadedSlice {
16393 /// Helper structure used to compute the cost of a slice.
16394 struct Cost {
16395 /// Are we optimizing for code size.
16396 bool ForCodeSize = false;
16397
16398 /// Various cost.
16399 unsigned Loads = 0;
16400 unsigned Truncates = 0;
16401 unsigned CrossRegisterBanksCopies = 0;
16402 unsigned ZExts = 0;
16403 unsigned Shift = 0;
16404
16405 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16406
16407 /// Get the cost of one isolated slice.
16408 Cost(const LoadedSlice &LS, bool ForCodeSize)
16409 : ForCodeSize(ForCodeSize), Loads(1) {
16410 EVT TruncType = LS.Inst->getValueType(0);
16411 EVT LoadedType = LS.getLoadedType();
16412 if (TruncType != LoadedType &&
16413 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16414 ZExts = 1;
16415 }
16416
16417 /// Account for slicing gain in the current cost.
16418 /// Slicing provide a few gains like removing a shift or a
16419 /// truncate. This method allows to grow the cost of the original
16420 /// load with the gain from this slice.
16421 void addSliceGain(const LoadedSlice &LS) {
16422 // Each slice saves a truncate.
16423 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16424 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16425 LS.Inst->getValueType(0)))
16426 ++Truncates;
16427 // If there is a shift amount, this slice gets rid of it.
16428 if (LS.Shift)
16429 ++Shift;
16430 // If this slice can merge a cross register bank copy, account for it.
16431 if (LS.canMergeExpensiveCrossRegisterBankCopy())
16432 ++CrossRegisterBanksCopies;
16433 }
16434
16435 Cost &operator+=(const Cost &RHS) {
16436 Loads += RHS.Loads;
16437 Truncates += RHS.Truncates;
16438 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16439 ZExts += RHS.ZExts;
16440 Shift += RHS.Shift;
16441 return *this;
16442 }
16443
16444 bool operator==(const Cost &RHS) const {
16445 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16446 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16447 ZExts == RHS.ZExts && Shift == RHS.Shift;
16448 }
16449
16450 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16451
16452 bool operator<(const Cost &RHS) const {
16453 // Assume cross register banks copies are as expensive as loads.
16454 // FIXME: Do we want some more target hooks?
16455 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16456 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16457 // Unless we are optimizing for code size, consider the
16458 // expensive operation first.
16459 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16461 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16462 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16463 }
16464
16465 bool operator>(const Cost &RHS) const { return RHS < *this; }
16466
16467 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16468
16469 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16470 };
16471
16472 // The last instruction that represent the slice. This should be a
16473 // truncate instruction.
16474 SDNode *Inst;
16475
16476 // The original load instruction.
16477 LoadSDNode *Origin;
16478
16479 // The right shift amount in bits from the original load.
16480 unsigned Shift;
16481
16482 // The DAG from which Origin came from.
16483 // This is used to get some contextual information about legal types, etc.
16484 SelectionDAG *DAG;
16485
16486 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16487 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16488 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16489
16490 /// Get the bits used in a chunk of bits \p BitWidth large.
16491 /// \return Result is \p BitWidth and has used bits set to 1 and
16492 /// not used bits set to 0.
16493 APInt getUsedBits() const {
16494 // Reproduce the trunc(lshr) sequence:
16495 // - Start from the truncated value.
16496 // - Zero extend to the desired bit width.
16497 // - Shift left.
16498 assert(Origin && "No original load to compare against.");
16499 unsigned BitWidth = Origin->getValueSizeInBits(0);
16500 assert(Inst && "This slice is not bound to an instruction");
16501 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16502 "Extracted slice is bigger than the whole type!");
16503 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16504 UsedBits.setAllBits();
16505 UsedBits = UsedBits.zext(BitWidth);
16506 UsedBits <<= Shift;
16507 return UsedBits;
16508 }
16509
16510 /// Get the size of the slice to be loaded in bytes.
16511 unsigned getLoadedSize() const {
16512 unsigned SliceSize = getUsedBits().countPopulation();
16513 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16514 return SliceSize / 8;
16515 }
16516
16517 /// Get the type that will be loaded for this slice.
16518 /// Note: This may not be the final type for the slice.
16519 EVT getLoadedType() const {
16520 assert(DAG && "Missing context");
16521 LLVMContext &Ctxt = *DAG->getContext();
16522 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16523 }
16524
16525 /// Get the alignment of the load used for this slice.
16526 Align getAlign() const {
16527 Align Alignment = Origin->getAlign();
16529 if (Offset != 0)
16530 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16531 return Alignment;
16532 }
16533
16534 /// Check if this slice can be rewritten with legal operations.
16535 bool isLegal() const {
16536 // An invalid slice is not legal.
16537 if (!Origin || !Inst || !DAG)
16538 return false;
16539
16540 // Offsets are for indexed load only, we do not handle that.
16541 if (!Origin->getOffset().isUndef())
16542 return false;
16543
16544 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16545
16546 // Check that the type is legal.
16548 if (!TLI.isTypeLegal(SliceType))
16549 return false;
16550
16551 // Check that the load is legal for this type.
16553 return false;
16554
16555 // Check that the offset can be computed.
16556 // 1. Check its type.
16557 EVT PtrType = Origin->getBasePtr().getValueType();
16558 if (PtrType == MVT::Untyped || PtrType.isExtended())
16559 return false;
16560
16561 // 2. Check that it fits in the immediate.
16563 return false;
16564
16565 // 3. Check that the computation is legal.
16567 return false;
16568
16569 // Check that the zext is legal if it needs one.
16570 EVT TruncateType = Inst->getValueType(0);
16571 if (TruncateType != SliceType &&
16573 return false;
16574
16575 return true;
16576 }
16577
16578 /// Get the offset in bytes of this slice in the original chunk of
16579 /// bits.
16580 /// \pre DAG != nullptr.
16581 uint64_t getOffsetFromBase() const {
16582 assert(DAG && "Missing context.");
16583 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16584 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16585 uint64_t Offset = Shift / 8;
16586 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16587 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16588 "The size of the original loaded type is not a multiple of a"
16589 " byte.");
16590 // If Offset is bigger than TySizeInBytes, it means we are loading all
16591 // zeros. This should have been optimized before in the process.
16593 "Invalid shift amount for given loaded size");
16594 if (IsBigEndian)
16596 return Offset;
16597 }
16598
16599 /// Generate the sequence of instructions to load the slice
16600 /// represented by this object and redirect the uses of this slice to
16601 /// this new sequence of instructions.
16602 /// \pre this->Inst && this->Origin are valid Instructions and this
16603 /// object passed the legal check: LoadedSlice::isLegal returned true.
16604 /// \return The last instruction of the sequence used to load the slice.
16605 SDValue loadSlice() const {
16606 assert(Inst && Origin && "Unable to replace a non-existing slice.");
16607 const SDValue &OldBaseAddr = Origin->getBasePtr();
16608 SDValue BaseAddr = OldBaseAddr;
16609 // Get the offset in that chunk of bytes w.r.t. the endianness.
16610 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16611 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16612 if (Offset) {
16613 // BaseAddr = BaseAddr + Offset.
16614 EVT ArithType = BaseAddr.getValueType();
16615 SDLoc DL(Origin);
16616 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16617 DAG->getConstant(Offset, DL, ArithType));
16618 }
16619
16620 // Create the type of the loaded slice according to its size.
16622
16623 // Create the load for the slice.
16624 SDValue LastInst =
16625 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16627 Origin->getMemOperand()->getFlags());
16628 // If the final type is not the same as the loaded type, this means that
16629 // we have to pad with zero. Create a zero extend for that.
16630 EVT FinalType = Inst->getValueType(0);
16631 if (SliceType != FinalType)
16632 LastInst =
16633 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16634 return LastInst;
16635 }
16636
16637 /// Check if this slice can be merged with an expensive cross register
16638 /// bank copy. E.g.,
16639 /// i = load i32
16640 /// f = bitcast i32 i to float
16642 if (!Inst || !Inst->hasOneUse())
16643 return false;
16644 SDNode *Use = *Inst->use_begin();
16645 if (Use->getOpcode() != ISD::BITCAST)
16646 return false;
16647 assert(DAG && "Missing context");
16648 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16649 EVT ResVT = Use->getValueType(0);
16650 const TargetRegisterClass *ResRC =
16651 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16652 const TargetRegisterClass *ArgRC =
16653 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16654 Use->getOperand(0)->isDivergent());
16655 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16656 return false;
16657
16658 // At this point, we know that we perform a cross-register-bank copy.
16659 // Check if it is expensive.
16661 // Assume bitcasts are cheap, unless both register classes do not
16662 // explicitly share a common sub class.
16663 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16664 return false;
16665
16666 // Check if it will be merged with the load.
16667 // 1. Check the alignment / fast memory access constraint.
16668 bool IsFast = false;
16669 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16670 Origin->getAddressSpace(), getAlign(),
16671 Origin->getMemOperand()->getFlags(), &IsFast) ||
16672 !IsFast)
16673 return false;
16674
16675 // 2. Check that the load is a legal operation for that type.
16676 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16677 return false;
16678
16679 // 3. Check that we do not have a zext in the way.
16680 if (Inst->getValueType(0) != getLoadedType())
16681 return false;
16682
16683 return true;
16684 }
16685};
16686
16687} // end anonymous namespace
16688
16689/// Check that all bits set in \p UsedBits form a dense region, i.e.,
16690/// \p UsedBits looks like 0..0 1..1 0..0.
16691static bool areUsedBitsDense(const APInt &UsedBits) {
16692 // If all the bits are one, this is dense!
16693 if (UsedBits.isAllOnes())
16694 return true;
16695
16696 // Get rid of the unused bits on the right.
16697 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16698 // Get rid of the unused bits on the left.
16699 if (NarrowedUsedBits.countLeadingZeros())
16700 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16701 // Check that the chunk of bits is completely used.
16702 return NarrowedUsedBits.isAllOnes();
16703}
16704
16705/// Check whether or not \p First and \p Second are next to each other
16706/// in memory. This means that there is no hole between the bits loaded
16707/// by \p First and the bits loaded by \p Second.
16708static bool areSlicesNextToEachOther(const LoadedSlice &First,
16709 const LoadedSlice &Second) {
16710 assert(First.Origin == Second.Origin && First.Origin &&
16711 "Unable to match different memory origins.");
16712 APInt UsedBits = First.getUsedBits();
16713 assert((UsedBits & Second.getUsedBits()) == 0 &&
16714 "Slices are not supposed to overlap.");
16715 UsedBits |= Second.getUsedBits();
16716 return areUsedBitsDense(UsedBits);
16717}
16718
16719/// Adjust the \p GlobalLSCost according to the target
16720/// paring capabilities and the layout of the slices.
16721/// \pre \p GlobalLSCost should account for at least as many loads as
16722/// there is in the slices in \p LoadedSlices.
16724 LoadedSlice::Cost &GlobalLSCost) {
16725 unsigned NumberOfSlices = LoadedSlices.size();
16726 // If there is less than 2 elements, no pairing is possible.
16727 if (NumberOfSlices < 2)
16728 return;
16729
16730 // Sort the slices so that elements that are likely to be next to each
16731 // other in memory are next to each other in the list.
16732 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16733 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16734 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16735 });
16736 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16737 // First (resp. Second) is the first (resp. Second) potentially candidate
16738 // to be placed in a paired load.
16739 const LoadedSlice *First = nullptr;
16740 const LoadedSlice *Second = nullptr;
16741 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16742 // Set the beginning of the pair.
16743 First = Second) {
16745
16746 // If First is NULL, it means we start a new pair.
16747 // Get to the next slice.
16748 if (!First)
16749 continue;
16750
16751 EVT LoadedType = First->getLoadedType();
16752
16753 // If the types of the slices are different, we cannot pair them.
16754 if (LoadedType != Second->getLoadedType())
16755 continue;
16756
16757 // Check if the target supplies paired loads for this type.
16759 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16760 // move to the next pair, this type is hopeless.
16761 Second = nullptr;
16762 continue;
16763 }
16764 // Check if we meet the alignment requirement.
16765 if (First->getAlign() < RequiredAlignment)
16766 continue;
16767
16768 // Check that both loads are next to each other in memory.
16769 if (!areSlicesNextToEachOther(*First, *Second))
16770 continue;
16771
16772 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16773 --GlobalLSCost.Loads;
16774 // Move to the next pair.
16775 Second = nullptr;
16776 }
16777}
16778
16779/// Check the profitability of all involved LoadedSlice.
16780/// Currently, it is considered profitable if there is exactly two
16781/// involved slices (1) which are (2) next to each other in memory, and
16782/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16783///
16784/// Note: The order of the elements in \p LoadedSlices may be modified, but not
16785/// the elements themselves.
16786///
16787/// FIXME: When the cost model will be mature enough, we can relax
16788/// constraints (1) and (2).
16790 const APInt &UsedBits, bool ForCodeSize) {
16791 unsigned NumberOfSlices = LoadedSlices.size();
16793 return NumberOfSlices > 1;
16794
16795 // Check (1).
16796 if (NumberOfSlices != 2)
16797 return false;
16798
16799 // Check (2).
16801 return false;
16802
16803 // Check (3).
16804 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16805 // The original code has one big load.
16806 OrigCost.Loads = 1;
16807 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16808 const LoadedSlice &LS = LoadedSlices[CurrSlice];
16809 // Accumulate the cost of all the slices.
16810 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16812
16813 // Account as cost in the original configuration the gain obtained
16814 // with the current slices.
16815 OrigCost.addSliceGain(LS);
16816 }
16817
16818 // If the target supports paired load, adjust the cost accordingly.
16820 return OrigCost > GlobalSlicingCost;
16821}
16822
16823/// If the given load, \p LI, is used only by trunc or trunc(lshr)
16824/// operations, split it in the various pieces being extracted.
16825///
16826/// This sort of thing is introduced by SROA.
16827/// This slicing takes care not to insert overlapping loads.
16828/// \pre LI is a simple load (i.e., not an atomic or volatile load).
16829bool DAGCombiner::SliceUpLoad(SDNode *N) {
16830 if (Level < AfterLegalizeDAG)
16831 return false;
16832
16834 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16835 !LD->getValueType(0).isInteger())
16836 return false;
16837
16838 // The algorithm to split up a load of a scalable vector into individual
16839 // elements currently requires knowing the length of the loaded type,
16840 // so will need adjusting to work on scalable vectors.
16841 if (LD->getValueType(0).isScalableVector())
16842 return false;
16843
16844 // Keep track of already used bits to detect overlapping values.
16845 // In that case, we will just abort the transformation.
16846 APInt UsedBits(LD->getValueSizeInBits(0), 0);
16847
16849
16850 // Check if this load is used as several smaller chunks of bits.
16851 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16852 // of computation for each trunc.
16853 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16854 UI != UIEnd; ++UI) {
16855 // Skip the uses of the chain.
16856 if (UI.getUse().getResNo() != 0)
16857 continue;
16858
16859 SDNode *User = *UI;
16860 unsigned Shift = 0;
16861
16862 // Check if this is a trunc(lshr).
16863 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16865 Shift = User->getConstantOperandVal(1);
16866 User = *User->use_begin();
16867 }
16868
16869 // At this point, User is a Truncate, iff we encountered, trunc or
16870 // trunc(lshr).
16871 if (User->getOpcode() != ISD::TRUNCATE)
16872 return false;
16873
16874 // The width of the type must be a power of 2 and greater than 8-bits.
16875 // Otherwise the load cannot be represented in LLVM IR.
16876 // Moreover, if we shifted with a non-8-bits multiple, the slice
16877 // will be across several bytes. We do not support that.
16878 unsigned Width = User->getValueSizeInBits(0);
16879 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16880 return false;
16881
16882 // Build the slice for this chain of computations.
16883 LoadedSlice LS(User, LD, Shift, &DAG);
16884 APInt CurrentUsedBits = LS.getUsedBits();
16885
16886 // Check if this slice overlaps with another.
16887 if ((CurrentUsedBits & UsedBits) != 0)
16888 return false;
16889 // Update the bits used globally.
16891
16892 // Check if the new slice would be legal.
16893 if (!LS.isLegal())
16894 return false;
16895
16896 // Record the slice.
16897 LoadedSlices.push_back(LS);
16898 }
16899
16900 // Abort slicing if it does not seem to be profitable.
16901 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16902 return false;
16903
16904 ++SlicedLoads;
16905
16906 // Rewrite each chain to use an independent load.
16907 // By construction, each chain can be represented by a unique load.
16908
16909 // Prepare the argument for the new token factor for all the slices.
16911 for (const LoadedSlice &LS : LoadedSlices) {
16912 SDValue SliceInst = LS.loadSlice();
16913 CombineTo(LS.Inst, SliceInst, true);
16914 if (SliceInst.getOpcode() != ISD::LOAD)
16915 SliceInst = SliceInst.getOperand(0);
16916 assert(SliceInst->getOpcode() == ISD::LOAD &&
16917 "It takes more than a zext to get to the loaded slice!!");
16918 ArgChains.push_back(SliceInst.getValue(1));
16919 }
16920
16922 ArgChains);
16923 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16924 AddToWorklist(Chain.getNode());
16925 return true;
16926}
16927
16928/// Check to see if V is (and load (ptr), imm), where the load is having
16929/// specific bytes cleared out. If so, return the byte size being masked out
16930/// and the shift amount.
16931static std::pair<unsigned, unsigned>
16933 std::pair<unsigned, unsigned> Result(0, 0);
16934
16935 // Check for the structure we're looking for.
16936 if (V->getOpcode() != ISD::AND ||
16937 !isa<ConstantSDNode>(V->getOperand(1)) ||
16938 !ISD::isNormalLoad(V->getOperand(0).getNode()))
16939 return Result;
16940
16941 // Check the chain and pointer.
16942 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16943 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
16944
16945 // This only handles simple types.
16946 if (V.getValueType() != MVT::i16 &&
16947 V.getValueType() != MVT::i32 &&
16948 V.getValueType() != MVT::i64)
16949 return Result;
16950
16951 // Check the constant mask. Invert it so that the bits being masked out are
16952 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
16953 // follow the sign bit for uniformity.
16954 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16956 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
16958 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
16959 if (NotMaskLZ == 64) return Result; // All zero mask.
16960
16961 // See if we have a continuous run of bits. If so, we have 0*1+0*
16963 return Result;
16964
16965 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16966 if (V.getValueType() != MVT::i64 && NotMaskLZ)
16967 NotMaskLZ -= 64-V.getValueSizeInBits();
16968
16969 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16970 switch (MaskedBytes) {
16971 case 1:
16972 case 2:
16973 case 4: break;
16974 default: return Result; // All one mask, or 5-byte mask.
16975 }
16976
16977 // Verify that the first bit starts at a multiple of mask so that the access
16978 // is aligned the same as the access width.
16979 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16980
16981 // For narrowing to be valid, it must be the case that the load the
16982 // immediately preceding memory operation before the store.
16983 if (LD == Chain.getNode())
16984 ; // ok.
16985 else if (Chain->getOpcode() == ISD::TokenFactor &&
16986 SDValue(LD, 1).hasOneUse()) {
16987 // LD has only 1 chain use so they are no indirect dependencies.
16988 if (!LD->isOperandOf(Chain.getNode()))
16989 return Result;
16990 } else
16991 return Result; // Fail.
16992
16993 Result.first = MaskedBytes;
16994 Result.second = NotMaskTZ/8;
16995 return Result;
16996}
16997
16998/// Check to see if IVal is something that provides a value as specified by
16999/// MaskInfo. If so, replace the specified store with a narrower store of
17000/// truncated IVal.
17001static SDValue
17002ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
17003 SDValue IVal, StoreSDNode *St,
17004 DAGCombiner *DC) {
17005 unsigned NumBytes = MaskInfo.first;
17006 unsigned ByteShift = MaskInfo.second;
17007 SelectionDAG &DAG = DC->getDAG();
17008
17009 // Check to see if IVal is all zeros in the part being masked in by the 'or'
17010 // that uses this. If not, this is not a replacement.
17011 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
17012 ByteShift*8, (ByteShift+NumBytes)*8);
17013 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
17014
17015 // Check that it is legal on the target to do this. It is legal if the new
17016 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
17017 // legalization (and the target doesn't explicitly think this is a bad idea).
17018 MVT VT = MVT::getIntegerVT(NumBytes * 8);
17019 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17020 if (!DC->isTypeLegal(VT))
17021 return SDValue();
17022 if (St->getMemOperand() &&
17023 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
17024 *St->getMemOperand()))
17025 return SDValue();
17026
17027 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
17028 // shifted by ByteShift and truncated down to NumBytes.
17029 if (ByteShift) {
17030 SDLoc DL(IVal);
17031 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
17032 DAG.getConstant(ByteShift*8, DL,
17033 DC->getShiftAmountTy(IVal.getValueType())));
17034 }
17035
17036 // Figure out the offset for the store and the alignment of the access.
17037 unsigned StOffset;
17038 if (DAG.getDataLayout().isLittleEndian())
17040 else
17041 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
17042
17043 SDValue Ptr = St->getBasePtr();
17044 if (StOffset) {
17045 SDLoc DL(IVal);
17047 }
17048
17049 // Truncate down to the new size.
17050 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
17051
17052 ++OpsNarrowed;
17053 return DAG
17054 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
17055 St->getPointerInfo().getWithOffset(StOffset),
17056 St->getOriginalAlign());
17057}
17058
17059/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
17060/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
17061/// narrowing the load and store if it would end up being a win for performance
17062/// or code size.
17063SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
17065 if (!ST->isSimple())
17066 return SDValue();
17067
17068 SDValue Chain = ST->getChain();
17069 SDValue Value = ST->getValue();
17070 SDValue Ptr = ST->getBasePtr();
17071 EVT VT = Value.getValueType();
17072
17073 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
17074 return SDValue();
17075
17076 unsigned Opc = Value.getOpcode();
17077
17078 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
17079 // is a byte mask indicating a consecutive number of bytes, check to see if
17080 // Y is known to provide just those bytes. If so, we try to replace the
17081 // load + replace + store sequence with a single (narrower) store, which makes
17082 // the load dead.
17084 std::pair<unsigned, unsigned> MaskedLoad;
17085 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
17086 if (MaskedLoad.first)
17088 Value.getOperand(1), ST,this))
17089 return NewST;
17090
17091 // Or is commutative, so try swapping X and Y.
17092 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
17093 if (MaskedLoad.first)
17095 Value.getOperand(0), ST,this))
17096 return NewST;
17097 }
17098
17100 return SDValue();
17101
17102 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
17103 Value.getOperand(1).getOpcode() != ISD::Constant)
17104 return SDValue();
17105
17106 SDValue N0 = Value.getOperand(0);
17107 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17108 Chain == SDValue(N0.getNode(), 1)) {
17110 if (LD->getBasePtr() != Ptr ||
17111 LD->getPointerInfo().getAddrSpace() !=
17112 ST->getPointerInfo().getAddrSpace())
17113 return SDValue();
17114
17115 // Find the type to narrow it the load / op / store to.
17116 SDValue N1 = Value.getOperand(1);
17117 unsigned BitWidth = N1.getValueSizeInBits();
17118 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
17119 if (Opc == ISD::AND)
17121 if (Imm == 0 || Imm.isAllOnes())
17122 return SDValue();
17123 unsigned ShAmt = Imm.countTrailingZeros();
17124 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
17125 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
17127 // The narrowing should be profitable, the load/store operation should be
17128 // legal (or custom) and the store size should be equal to the NewVT width.
17129 while (NewBW < BitWidth &&
17130 (NewVT.getStoreSizeInBits() != NewBW ||
17131 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17132 !TLI.isNarrowingProfitable(VT, NewVT))) {
17135 }
17136 if (NewBW >= BitWidth)
17137 return SDValue();
17138
17139 // If the lsb changed does not start at the type bitwidth boundary,
17140 // start at the previous one.
17141 if (ShAmt % NewBW)
17142 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17144 std::min(BitWidth, ShAmt + NewBW));
17145 if ((Imm & Mask) == Imm) {
17146 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17147 if (Opc == ISD::AND)
17149 uint64_t PtrOff = ShAmt / 8;
17150 // For big endian targets, we need to adjust the offset to the pointer to
17151 // load the correct bytes.
17152 if (DAG.getDataLayout().isBigEndian())
17153 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17154
17155 bool IsFast = false;
17156 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17157 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17158 LD->getAddressSpace(), NewAlign,
17159 LD->getMemOperand()->getFlags(), &IsFast) ||
17160 !IsFast)
17161 return SDValue();
17162
17163 SDValue NewPtr =
17165 SDValue NewLD =
17166 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17167 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17168 LD->getMemOperand()->getFlags(), LD->getAAInfo());
17169 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17171 NewVT));
17172 SDValue NewST =
17173 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17174 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17175
17176 AddToWorklist(NewPtr.getNode());
17177 AddToWorklist(NewLD.getNode());
17178 AddToWorklist(NewVal.getNode());
17179 WorklistRemover DeadNodes(*this);
17180 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17181 ++OpsNarrowed;
17182 return NewST;
17183 }
17184 }
17185
17186 return SDValue();
17187}
17188
17189/// For a given floating point load / store pair, if the load value isn't used
17190/// by any other operations, then consider transforming the pair to integer
17191/// load / store operations if the target deems the transformation profitable.
17192SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17194 SDValue Value = ST->getValue();
17195 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17196 Value.hasOneUse()) {
17198 EVT VT = LD->getMemoryVT();
17199 if (!VT.isFloatingPoint() ||
17200 VT != ST->getMemoryVT() ||
17201 LD->isNonTemporal() ||
17202 ST->isNonTemporal() ||
17203 LD->getPointerInfo().getAddrSpace() != 0 ||
17204 ST->getPointerInfo().getAddrSpace() != 0)
17205 return SDValue();
17206
17208
17209 // We don't know the size of scalable types at compile time so we cannot
17210 // create an integer of the equivalent size.
17211 if (VTSize.isScalable())
17212 return SDValue();
17213
17214 bool FastLD = false, FastST = false;
17215 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17216 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17221 *LD->getMemOperand(), &FastLD) ||
17223 *ST->getMemOperand(), &FastST) ||
17224 !FastLD || !FastST)
17225 return SDValue();
17226
17227 SDValue NewLD =
17228 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17229 LD->getPointerInfo(), LD->getAlign());
17230
17231 SDValue NewST =
17232 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17233 ST->getPointerInfo(), ST->getAlign());
17234
17235 AddToWorklist(NewLD.getNode());
17236 AddToWorklist(NewST.getNode());
17237 WorklistRemover DeadNodes(*this);
17238 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17239 ++LdStFP2Int;
17240 return NewST;
17241 }
17242
17243 return SDValue();
17244}
17245
17246// This is a helper function for visitMUL to check the profitability
17247// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17248// MulNode is the original multiply, AddNode is (add x, c1),
17249// and ConstNode is c2.
17250//
17251// If the (add x, c1) has multiple uses, we could increase
17252// the number of adds if we make this transformation.
17253// It would only be worth doing this if we can remove a
17254// multiply in the process. Check for that here.
17255// To illustrate:
17256// (A + c1) * c3
17257// (A + c2) * c3
17258// We're checking for cases where we have common "c3 * A" expressions.
17259bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
17261 SDValue &ConstNode) {
17262 APInt Val;
17263
17264 // If the add only has one use, and the target thinks the folding is
17265 // profitable or does not lead to worse code, this would be OK to do.
17266 if (AddNode.getNode()->hasOneUse() &&
17268 return true;
17269
17270 // Walk all the users of the constant with which we're multiplying.
17271 for (SDNode *Use : ConstNode->uses()) {
17272 if (Use == MulNode) // This use is the one we're on right now. Skip it.
17273 continue;
17274
17275 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17276 SDNode *OtherOp;
17277 SDNode *MulVar = AddNode.getOperand(0).getNode();
17278
17279 // OtherOp is what we're multiplying against the constant.
17280 if (Use->getOperand(0) == ConstNode)
17281 OtherOp = Use->getOperand(1).getNode();
17282 else
17283 OtherOp = Use->getOperand(0).getNode();
17284
17285 // Check to see if multiply is with the same operand of our "add".
17286 //
17287 // ConstNode = CONST
17288 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
17289 // ...
17290 // AddNode = (A + c1) <-- MulVar is A.
17291 // = AddNode * ConstNode <-- current visiting instruction.
17292 //
17293 // If we make this transformation, we will have a common
17294 // multiply (ConstNode * A) that we can save.
17295 if (OtherOp == MulVar)
17296 return true;
17297
17298 // Now check to see if a future expansion will give us a common
17299 // multiply.
17300 //
17301 // ConstNode = CONST
17302 // AddNode = (A + c1)
17303 // ... = AddNode * ConstNode <-- current visiting instruction.
17304 // ...
17305 // OtherOp = (A + c2)
17306 // Use = OtherOp * ConstNode <-- visiting Use.
17307 //
17308 // If we make this transformation, we will have a common
17309 // multiply (CONST * A) after we also do the same transformation
17310 // to the "t2" instruction.
17311 if (OtherOp->getOpcode() == ISD::ADD &&
17313 OtherOp->getOperand(0).getNode() == MulVar)
17314 return true;
17315 }
17316 }
17317
17318 // Didn't find a case where this would be profitable.
17319 return false;
17320}
17321
17322SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17323 unsigned NumStores) {
17326 SDLoc StoreDL(StoreNodes[0].MemNode);
17327
17328 for (unsigned i = 0; i < NumStores; ++i) {
17329 Visited.insert(StoreNodes[i].MemNode);
17330 }
17331
17332 // don't include nodes that are children or repeated nodes.
17333 for (unsigned i = 0; i < NumStores; ++i) {
17334 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17335 Chains.push_back(StoreNodes[i].MemNode->getChain());
17336 }
17337
17338 assert(Chains.size() > 0 && "Chain should have generated a chain");
17339 return DAG.getTokenFactor(StoreDL, Chains);
17340}
17341
17342bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17343 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17344 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17345 // Make sure we have something to merge.
17346 if (NumStores < 2)
17347 return false;
17348
17349 assert((!UseTrunc || !UseVector) &&
17350 "This optimization cannot emit a vector truncating store");
17351
17352 // The latest Node in the DAG.
17353 SDLoc DL(StoreNodes[0].MemNode);
17354
17355 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17356 unsigned SizeInBits = NumStores * ElementSizeBits;
17357 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17358
17360 AAMDNodes AAInfo;
17361 for (unsigned I = 0; I != NumStores; ++I) {
17363 if (!Flags) {
17364 Flags = St->getMemOperand()->getFlags();
17365 AAInfo = St->getAAInfo();
17366 continue;
17367 }
17368 // Skip merging if there's an inconsistent flag.
17369 if (Flags != St->getMemOperand()->getFlags())
17370 return false;
17371 // Concatenate AA metadata.
17372 AAInfo = AAInfo.concat(St->getAAInfo());
17373 }
17374
17375 EVT StoreTy;
17376 if (UseVector) {
17377 unsigned Elts = NumStores * NumMemElts;
17378 // Get the type for the merged vector store.
17379 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17380 } else
17381 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17382
17384 if (UseVector) {
17385 if (IsConstantSrc) {
17387 for (unsigned I = 0; I != NumStores; ++I) {
17389 SDValue Val = St->getValue();
17390 // If constant is of the wrong type, convert it now.
17391 if (MemVT != Val.getValueType()) {
17392 Val = peekThroughBitcasts(Val);
17393 // Deal with constants of wrong size.
17394 if (ElementSizeBits != Val.getValueSizeInBits()) {
17395 EVT IntMemVT =
17396 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17397 if (isa<ConstantFPSDNode>(Val)) {
17398 // Not clear how to truncate FP values.
17399 return false;
17400 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
17401 Val = DAG.getConstant(C->getAPIntValue()
17402 .zextOrTrunc(Val.getValueSizeInBits())
17403 .zextOrTrunc(ElementSizeBits),
17404 SDLoc(C), IntMemVT);
17405 }
17406 // Make sure correctly size type is the correct type.
17407 Val = DAG.getBitcast(MemVT, Val);
17408 }
17409 BuildVector.push_back(Val);
17410 }
17411 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17412 : ISD::BUILD_VECTOR,
17414 } else {
17416 for (unsigned i = 0; i < NumStores; ++i) {
17418 SDValue Val = peekThroughBitcasts(St->getValue());
17419 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17420 // type MemVT. If the underlying value is not the correct
17421 // type, but it is an extraction of an appropriate vector we
17422 // can recast Val to be of the correct type. This may require
17423 // converting between EXTRACT_VECTOR_ELT and
17424 // EXTRACT_SUBVECTOR.
17425 if ((MemVT != Val.getValueType()) &&
17428 EVT MemVTScalarTy = MemVT.getScalarType();
17429 // We may need to add a bitcast here to get types to line up.
17430 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17431 Val = DAG.getBitcast(MemVT, Val);
17432 } else {
17433 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17435 SDValue Vec = Val.getOperand(0);
17436 SDValue Idx = Val.getOperand(1);
17437 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17438 }
17439 }
17440 Ops.push_back(Val);
17441 }
17442
17443 // Build the extracted vector elements back into a vector.
17444 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17445 : ISD::BUILD_VECTOR,
17446 DL, StoreTy, Ops);
17447 }
17448 } else {
17449 // We should always use a vector store when merging extracted vector
17450 // elements, so this path implies a store of constants.
17451 assert(IsConstantSrc && "Merged vector elements should use vector store");
17452
17453 APInt StoreInt(SizeInBits, 0);
17454
17455 // Construct a single integer constant which is made of the smaller
17456 // constant inputs.
17457 bool IsLE = DAG.getDataLayout().isLittleEndian();
17458 for (unsigned i = 0; i < NumStores; ++i) {
17459 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17461
17462 SDValue Val = St->getValue();
17463 Val = peekThroughBitcasts(Val);
17466 StoreInt |= C->getAPIntValue()
17467 .zextOrTrunc(ElementSizeBits)
17468 .zextOrTrunc(SizeInBits);
17469 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17470 StoreInt |= C->getValueAPF()
17471 .bitcastToAPInt()
17472 .zextOrTrunc(ElementSizeBits)
17473 .zextOrTrunc(SizeInBits);
17474 // If fp truncation is necessary give up for now.
17475 if (MemVT.getSizeInBits() != ElementSizeBits)
17476 return false;
17477 } else {
17478 llvm_unreachable("Invalid constant element type");
17479 }
17480 }
17481
17482 // Create the new Load and Store operations.
17484 }
17485
17486 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17488
17489 // make sure we use trunc store if it's necessary to be legal.
17491 if (!UseTrunc) {
17492 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17493 FirstInChain->getPointerInfo(),
17494 FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17495 } else { // Must be realized as a trunc store
17497 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17498 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17501 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17503 NewStore = DAG.getTruncStore(
17504 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17505 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17506 FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17507 }
17508
17509 // Replace all merged stores with the new store.
17510 for (unsigned i = 0; i < NumStores; ++i)
17511 CombineTo(StoreNodes[i].MemNode, NewStore);
17512
17513 AddToWorklist(NewChain.getNode());
17514 return true;
17515}
17516
17517void DAGCombiner::getStoreMergeCandidates(
17519 SDNode *&RootNode) {
17520 // This holds the base pointer, index, and the offset in bytes from the base
17521 // pointer. We must have a base and an offset. Do not handle stores to undef
17522 // base pointers.
17524 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17525 return;
17526
17527 SDValue Val = peekThroughBitcasts(St->getValue());
17528 StoreSource StoreSrc = getStoreSource(Val);
17529 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17530
17531 // Match on loadbaseptr if relevant.
17532 EVT MemVT = St->getMemoryVT();
17534 EVT LoadVT;
17535 if (StoreSrc == StoreSource::Load) {
17536 auto *Ld = cast<LoadSDNode>(Val);
17538 LoadVT = Ld->getMemoryVT();
17539 // Load and store should be the same type.
17540 if (MemVT != LoadVT)
17541 return;
17542 // Loads must only have one use.
17543 if (!Ld->hasNUsesOfValue(1, 0))
17544 return;
17545 // The memory operands must not be volatile/indexed/atomic.
17546 // TODO: May be able to relax for unordered atomics (see D66309)
17547 if (!Ld->isSimple() || Ld->isIndexed())
17548 return;
17549 }
17551 int64_t &Offset) -> bool {
17552 // The memory operands must not be volatile/indexed/atomic.
17553 // TODO: May be able to relax for unordered atomics (see D66309)
17554 if (!Other->isSimple() || Other->isIndexed())
17555 return false;
17556 // Don't mix temporal stores with non-temporal stores.
17557 if (St->isNonTemporal() != Other->isNonTemporal())
17558 return false;
17560 // Allow merging constants of different types as integers.
17561 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17562 : Other->getMemoryVT() != MemVT;
17563 switch (StoreSrc) {
17564 case StoreSource::Load: {
17565 if (NoTypeMatch)
17566 return false;
17567 // The Load's Base Ptr must also match.
17569 if (!OtherLd)
17570 return false;
17572 if (LoadVT != OtherLd->getMemoryVT())
17573 return false;
17574 // Loads must only have one use.
17575 if (!OtherLd->hasNUsesOfValue(1, 0))
17576 return false;
17577 // The memory operands must not be volatile/indexed/atomic.
17578 // TODO: May be able to relax for unordered atomics (see D66309)
17579 if (!OtherLd->isSimple() || OtherLd->isIndexed())
17580 return false;
17581 // Don't mix temporal loads with non-temporal loads.
17582 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17583 return false;
17584 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17585 return false;
17586 break;
17587 }
17588 case StoreSource::Constant:
17589 if (NoTypeMatch)
17590 return false;
17592 return false;
17593 break;
17594 case StoreSource::Extract:
17595 // Do not merge truncated stores here.
17596 if (Other->isTruncatingStore())
17597 return false;
17598 if (!MemVT.bitsEq(OtherBC.getValueType()))
17599 return false;
17600 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17601 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17602 return false;
17603 break;
17604 default:
17605 llvm_unreachable("Unhandled store source for merging");
17606 }
17607 Ptr = BaseIndexOffset::match(Other, DAG);
17608 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17609 };
17610
17611 // Check if the pair of StoreNode and the RootNode already bail out many
17612 // times which is over the limit in dependence check.
17614 SDNode *RootNode) -> bool {
17615 auto RootCount = StoreRootCountMap.find(StoreNode);
17616 return RootCount != StoreRootCountMap.end() &&
17617 RootCount->second.first == RootNode &&
17618 RootCount->second.second > StoreMergeDependenceLimit;
17619 };
17620
17622 // This must be a chain use.
17623 if (UseIter.getOperandNo() != 0)
17624 return;
17626 BaseIndexOffset Ptr;
17627 int64_t PtrDiff;
17628 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17630 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17631 }
17632 };
17633
17634 // We looking for a root node which is an ancestor to all mergable
17635 // stores. We search up through a load, to our root and then down
17636 // through all children. For instance we will find Store{1,2,3} if
17637 // St is Store1, Store2. or Store3 where the root is not a load
17638 // which always true for nonvolatile ops. TODO: Expand
17639 // the search to find all valid candidates through multiple layers of loads.
17640 //
17641 // Root
17642 // |-------|-------|
17643 // Load Load Store3
17644 // | |
17645 // Store1 Store2
17646 //
17647 // FIXME: We should be able to climb and
17648 // descend TokenFactors to find candidates as well.
17649
17650 RootNode = St->getChain().getNode();
17651
17652 unsigned NumNodesExplored = 0;
17653 const unsigned MaxSearchNodes = 1024;
17654 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17655 RootNode = Ldn->getChain().getNode();
17656 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17658 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17659 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17661 }
17662 // Check stores that depend on the root (e.g. Store 3 in the chart above).
17663 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
17665 }
17666 }
17667 } else {
17668 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17671 }
17672}
17673
17674// We need to check that merging these stores does not cause a loop in
17675// the DAG. Any store candidate may depend on another candidate
17676// indirectly through its operand (we already consider dependencies
17677// through the chain). Check in parallel by searching up from
17678// non-chain operands of candidates.
17679bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17680 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17681 SDNode *RootNode) {
17682 // FIXME: We should be able to truncate a full search of
17683 // predecessors by doing a BFS and keeping tabs the originating
17684 // stores from which worklist nodes come from in a similar way to
17685 // TokenFactor simplfication.
17686
17689
17690 // RootNode is a predecessor to all candidates so we need not search
17691 // past it. Add RootNode (peeking through TokenFactors). Do not count
17692 // these towards size check.
17693
17694 Worklist.push_back(RootNode);
17695 while (!Worklist.empty()) {
17696 auto N = Worklist.pop_back_val();
17697 if (!Visited.insert(N).second)
17698 continue; // Already present in Visited.
17699 if (N->getOpcode() == ISD::TokenFactor) {
17700 for (SDValue Op : N->ops())
17701 Worklist.push_back(Op.getNode());
17702 }
17703 }
17704
17705 // Don't count pruning nodes towards max.
17706 unsigned int Max = 1024 + Visited.size();
17707 // Search Ops of store candidates.
17708 for (unsigned i = 0; i < NumStores; ++i) {
17709 SDNode *N = StoreNodes[i].MemNode;
17710 // Of the 4 Store Operands:
17711 // * Chain (Op 0) -> We have already considered these
17712 // in candidate selection and can be
17713 // safely ignored
17714 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17715 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17716 // but aren't necessarily fromt the same base node, so
17717 // cycles possible (e.g. via indexed store).
17718 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17719 // non-indexed stores). Not constant on all targets (e.g. ARM)
17720 // and so can participate in a cycle.
17721 for (unsigned j = 1; j < N->getNumOperands(); ++j)
17722 Worklist.push_back(N->getOperand(j).getNode());
17723 }
17724 // Search through DAG. We can stop early if we find a store node.
17725 for (unsigned i = 0; i < NumStores; ++i)
17726 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17727 Max)) {
17728 // If the searching bail out, record the StoreNode and RootNode in the
17729 // StoreRootCountMap. If we have seen the pair many times over a limit,
17730 // we won't add the StoreNode into StoreNodes set again.
17731 if (Visited.size() >= Max) {
17732 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17733 if (RootCount.first == RootNode)
17734 RootCount.second++;
17735 else
17736 RootCount = {RootNode, 1};
17737 }
17738 return false;
17739 }
17740 return true;
17741}
17742
17743unsigned
17744DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17745 int64_t ElementSizeBytes) const {
17746 while (true) {
17747 // Find a store past the width of the first store.
17748 size_t StartIdx = 0;
17749 while ((StartIdx + 1 < StoreNodes.size()) &&
17750 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17751 StoreNodes[StartIdx + 1].OffsetFromBase)
17752 ++StartIdx;
17753
17754 // Bail if we don't have enough candidates to merge.
17755 if (StartIdx + 1 >= StoreNodes.size())
17756 return 0;
17757
17758 // Trim stores that overlapped with the first store.
17759 if (StartIdx)
17760 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17761
17762 // Scan the memory operations on the chain and find the first
17763 // non-consecutive store memory address.
17764 unsigned NumConsecutiveStores = 1;
17765 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17766 // Check that the addresses are consecutive starting from the second
17767 // element in the list of stores.
17768 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17769 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17770 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17771 break;
17772 NumConsecutiveStores = i + 1;
17773 }
17774 if (NumConsecutiveStores > 1)
17775 return NumConsecutiveStores;
17776
17777 // There are no consecutive stores at the start of the list.
17778 // Remove the first store and try again.
17779 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17780 }
17781}
17782
17783bool DAGCombiner::tryStoreMergeOfConstants(
17785 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17786 LLVMContext &Context = *DAG.getContext();
17787 const DataLayout &DL = DAG.getDataLayout();
17788 int64_t ElementSizeBytes = MemVT.getStoreSize();
17789 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17790 bool MadeChange = false;
17791
17792 // Store the constants into memory as one consecutive store.
17793 while (NumConsecutiveStores >= 2) {
17794 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17795 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17796 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17797 unsigned LastLegalType = 1;
17798 unsigned LastLegalVectorType = 1;
17799 bool LastIntegerTrunc = false;
17800 bool NonZero = false;
17802 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17804 SDValue StoredVal = ST->getValue();
17805 bool IsElementZero = false;
17807 IsElementZero = C->isZero();
17809 IsElementZero = C->getConstantFPValue()->isNullValue();
17810 if (IsElementZero) {
17813 }
17814 NonZero |= !IsElementZero;
17815
17816 // Find a legal type for the constant store.
17817 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17818 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17819 bool IsFast = false;
17820
17821 // Break early when size is too large to be legal.
17822 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17823 break;
17824
17825 if (TLI.isTypeLegal(StoreTy) &&
17827 DAG.getMachineFunction()) &&
17828 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17829 *FirstInChain->getMemOperand(), &IsFast) &&
17830 IsFast) {
17831 LastIntegerTrunc = false;
17832 LastLegalType = i + 1;
17833 // Or check whether a truncstore is legal.
17834 } else if (TLI.getTypeAction(Context, StoreTy) ==
17837 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17840 DAG.getMachineFunction()) &&
17841 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17842 *FirstInChain->getMemOperand(), &IsFast) &&
17843 IsFast) {
17844 LastIntegerTrunc = true;
17845 LastLegalType = i + 1;
17846 }
17847 }
17848
17849 // We only use vectors if the constant is known to be zero or the
17850 // target allows it and the function is not marked with the
17851 // noimplicitfloat attribute.
17852 if ((!NonZero ||
17854 AllowVectors) {
17855 // Find a legal type for the vector store.
17856 unsigned Elts = (i + 1) * NumMemElts;
17857 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17858 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17860 TLI.allowsMemoryAccess(Context, DL, Ty,
17861 *FirstInChain->getMemOperand(), &IsFast) &&
17862 IsFast)
17863 LastLegalVectorType = i + 1;
17864 }
17865 }
17866
17867 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17868 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17869 bool UseTrunc = LastIntegerTrunc && !UseVector;
17870
17871 // Check if we found a legal integer type that creates a meaningful
17872 // merge.
17873 if (NumElem < 2) {
17874 // We know that candidate stores are in order and of correct
17875 // shape. While there is no mergeable sequence from the
17876 // beginning one may start later in the sequence. The only
17877 // reason a merge of size N could have failed where another of
17878 // the same size would not have, is if the alignment has
17879 // improved or we've dropped a non-zero value. Drop as many
17880 // candidates as we can here.
17881 unsigned NumSkip = 1;
17882 while ((NumSkip < NumConsecutiveStores) &&
17884 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17885 NumSkip++;
17886
17887 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17889 continue;
17890 }
17891
17892 // Check that we can merge these candidates without causing a cycle.
17894 RootNode)) {
17895 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17897 continue;
17898 }
17899
17901 /*IsConstantSrc*/ true,
17902 UseVector, UseTrunc);
17903
17904 // Remove merged stores for next iteration.
17905 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17907 }
17908 return MadeChange;
17909}
17910
17911bool DAGCombiner::tryStoreMergeOfExtracts(
17913 EVT MemVT, SDNode *RootNode) {
17914 LLVMContext &Context = *DAG.getContext();
17915 const DataLayout &DL = DAG.getDataLayout();
17916 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17917 bool MadeChange = false;
17918
17919 // Loop on Consecutive Stores on success.
17920 while (NumConsecutiveStores >= 2) {
17921 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17922 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17923 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17924 unsigned NumStoresToMerge = 1;
17925 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17926 // Find a legal type for the vector store.
17927 unsigned Elts = (i + 1) * NumMemElts;
17928 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17929 bool IsFast = false;
17930
17931 // Break early when size is too large to be legal.
17932 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17933 break;
17934
17935 if (TLI.isTypeLegal(Ty) &&
17937 TLI.allowsMemoryAccess(Context, DL, Ty,
17938 *FirstInChain->getMemOperand(), &IsFast) &&
17939 IsFast)
17940 NumStoresToMerge = i + 1;
17941 }
17942
17943 // Check if we found a legal integer type creating a meaningful
17944 // merge.
17945 if (NumStoresToMerge < 2) {
17946 // We know that candidate stores are in order and of correct
17947 // shape. While there is no mergeable sequence from the
17948 // beginning one may start later in the sequence. The only
17949 // reason a merge of size N could have failed where another of
17950 // the same size would not have, is if the alignment has
17951 // improved. Drop as many candidates as we can here.
17952 unsigned NumSkip = 1;
17953 while ((NumSkip < NumConsecutiveStores) &&
17954 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17955 NumSkip++;
17956
17957 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17959 continue;
17960 }
17961
17962 // Check that we can merge these candidates without causing a cycle.
17964 RootNode)) {
17965 StoreNodes.erase(StoreNodes.begin(),
17966 StoreNodes.begin() + NumStoresToMerge);
17968 continue;
17969 }
17970
17971 MadeChange |= mergeStoresOfConstantsOrVecElts(
17972 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17973 /*UseVector*/ true, /*UseTrunc*/ false);
17974
17975 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17977 }
17978 return MadeChange;
17979}
17980
17981bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17982 unsigned NumConsecutiveStores, EVT MemVT,
17983 SDNode *RootNode, bool AllowVectors,
17984 bool IsNonTemporalStore,
17985 bool IsNonTemporalLoad) {
17986 LLVMContext &Context = *DAG.getContext();
17987 const DataLayout &DL = DAG.getDataLayout();
17988 int64_t ElementSizeBytes = MemVT.getStoreSize();
17989 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17990 bool MadeChange = false;
17991
17992 // Look for load nodes which are used by the stored values.
17994
17995 // Find acceptable loads. Loads need to have the same chain (token factor),
17996 // must not be zext, volatile, indexed, and they must be consecutive.
17998
17999 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18001 SDValue Val = peekThroughBitcasts(St->getValue());
18003
18005 // If this is not the first ptr that we check.
18006 int64_t LdOffset = 0;
18007 if (LdBasePtr.getBase().getNode()) {
18008 // The base ptr must be the same.
18009 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
18010 break;
18011 } else {
18012 // Check that all other base pointers are the same as this one.
18013 LdBasePtr = LdPtr;
18014 }
18015
18016 // We found a potential memory operand to merge.
18017 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
18018 }
18019
18020 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
18022 bool NeedRotate = false;
18023 if (LoadNodes.size() == 2) {
18024 // If we have load/store pair instructions and we only have two values,
18025 // don't bother merging.
18027 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
18028 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
18029 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
18030 break;
18031 }
18032 // If the loads are reversed, see if we can rotate the halves into place.
18033 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
18034 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
18035 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
18036 if (Offset0 - Offset1 == ElementSizeBytes &&
18040 NeedRotate = true;
18041 }
18042 }
18043 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18044 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18045 Align FirstStoreAlign = FirstInChain->getAlign();
18047
18048 // Scan the memory operations on the chain and find the first
18049 // non-consecutive load memory address. These variables hold the index in
18050 // the store node array.
18051
18052 unsigned LastConsecutiveLoad = 1;
18053
18054 // This variable refers to the size and not index in the array.
18055 unsigned LastLegalVectorType = 1;
18056 unsigned LastLegalIntegerType = 1;
18057 bool isDereferenceable = true;
18058 bool DoIntegerTruncate = false;
18059 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
18060 SDValue LoadChain = FirstLoad->getChain();
18061 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
18062 // All loads must share the same chain.
18063 if (LoadNodes[i].MemNode->getChain() != LoadChain)
18064 break;
18065
18066 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
18067 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18068 break;
18070
18071 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
18072 isDereferenceable = false;
18073
18074 // Find a legal type for the vector store.
18075 unsigned Elts = (i + 1) * NumMemElts;
18076 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18077
18078 // Break early when size is too large to be legal.
18079 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18080 break;
18081
18082 bool IsFastSt = false;
18083 bool IsFastLd = false;
18084 // Don't try vector types if we need a rotate. We may still fail the
18085 // legality checks for the integer type, but we can't handle the rotate
18086 // case with vectors.
18087 // FIXME: We could use a shuffle in place of the rotate.
18088 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
18090 DAG.getMachineFunction()) &&
18091 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18092 *FirstInChain->getMemOperand(), &IsFastSt) &&
18093 IsFastSt &&
18094 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18095 *FirstLoad->getMemOperand(), &IsFastLd) &&
18096 IsFastLd) {
18097 LastLegalVectorType = i + 1;
18098 }
18099
18100 // Find a legal type for the integer store.
18101 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18102 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18103 if (TLI.isTypeLegal(StoreTy) &&
18105 DAG.getMachineFunction()) &&
18106 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18107 *FirstInChain->getMemOperand(), &IsFastSt) &&
18108 IsFastSt &&
18109 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18110 *FirstLoad->getMemOperand(), &IsFastLd) &&
18111 IsFastLd) {
18112 LastLegalIntegerType = i + 1;
18113 DoIntegerTruncate = false;
18114 // Or check whether a truncstore and extload is legal.
18115 } else if (TLI.getTypeAction(Context, StoreTy) ==
18120 DAG.getMachineFunction()) &&
18124 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18125 *FirstInChain->getMemOperand(), &IsFastSt) &&
18126 IsFastSt &&
18127 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18128 *FirstLoad->getMemOperand(), &IsFastLd) &&
18129 IsFastLd) {
18130 LastLegalIntegerType = i + 1;
18131 DoIntegerTruncate = true;
18132 }
18133 }
18134 }
18135
18136 // Only use vector types if the vector type is larger than the integer
18137 // type. If they are the same, use integers.
18138 bool UseVectorTy =
18140 unsigned LastLegalType =
18142
18143 // We add +1 here because the LastXXX variables refer to location while
18144 // the NumElem refers to array/index size.
18145 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18146 NumElem = std::min(LastLegalType, NumElem);
18147 Align FirstLoadAlign = FirstLoad->getAlign();
18148
18149 if (NumElem < 2) {
18150 // We know that candidate stores are in order and of correct
18151 // shape. While there is no mergeable sequence from the
18152 // beginning one may start later in the sequence. The only
18153 // reason a merge of size N could have failed where another of
18154 // the same size would not have is if the alignment or either
18155 // the load or store has improved. Drop as many candidates as we
18156 // can here.
18157 unsigned NumSkip = 1;
18158 while ((NumSkip < LoadNodes.size()) &&
18159 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18160 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18161 NumSkip++;
18162 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18163 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18165 continue;
18166 }
18167
18168 // Check that we can merge these candidates without causing a cycle.
18170 RootNode)) {
18171 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18172 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18174 continue;
18175 }
18176
18177 // Find if it is better to use vectors or integers to load and store
18178 // to memory.
18180 if (UseVectorTy) {
18181 // Find a legal type for the vector store.
18182 unsigned Elts = NumElem * NumMemElts;
18183 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18184 } else {
18185 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18186 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18187 }
18188
18189 SDLoc LoadDL(LoadNodes[0].MemNode);
18190 SDLoc StoreDL(StoreNodes[0].MemNode);
18191
18192 // The merged loads are required to have the same incoming chain, so
18193 // using the first's chain is acceptable.
18194
18196 AddToWorklist(NewStoreChain.getNode());
18197
18199 isDereferenceable ? MachineMemOperand::MODereferenceable
18203
18207
18210 NewLoad = DAG.getLoad(
18211 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18212 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18213 SDValue StoreOp = NewLoad;
18214 if (NeedRotate) {
18215 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18217 "Unexpected type for rotate-able load pair");
18218 SDValue RotAmt =
18220 // Target can convert to the identical ROTR if it does not have ROTL.
18222 }
18223 NewStore = DAG.getStore(
18224 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18225 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18226 } else { // This must be the truncstore/extload case
18227 EVT ExtendedTy =
18230 FirstLoad->getChain(), FirstLoad->getBasePtr(),
18231 FirstLoad->getPointerInfo(), JointMemOpVT,
18233 NewStore = DAG.getTruncStore(
18234 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18235 FirstInChain->getPointerInfo(), JointMemOpVT,
18236 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18237 }
18238
18239 // Transfer chain users from old loads to the new load.
18240 for (unsigned i = 0; i < NumElem; ++i) {
18241 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18243 SDValue(NewLoad.getNode(), 1));
18244 }
18245
18246 // Replace all stores with the new store. Recursively remove corresponding
18247 // values if they are no longer used.
18248 for (unsigned i = 0; i < NumElem; ++i) {
18249 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18250 CombineTo(StoreNodes[i].MemNode, NewStore);
18251 if (Val.getNode()->use_empty())
18252 recursivelyDeleteUnusedNodes(Val.getNode());
18253 }
18254
18255 MadeChange = true;
18256 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18257 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18259 }
18260 return MadeChange;
18261}
18262
18263bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18264 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18265 return false;
18266
18267 // TODO: Extend this function to merge stores of scalable vectors.
18268 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18269 // store since we know <vscale x 16 x i8> is exactly twice as large as
18270 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18271 EVT MemVT = St->getMemoryVT();
18272 if (MemVT.isScalableVector())
18273 return false;
18274 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18275 return false;
18276
18277 // This function cannot currently deal with non-byte-sized memory sizes.
18278 int64_t ElementSizeBytes = MemVT.getStoreSize();
18279 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18280 return false;
18281
18282 // Do not bother looking at stored values that are not constants, loads, or
18283 // extracted vector elements.
18284 SDValue StoredVal = peekThroughBitcasts(St->getValue());
18285 const StoreSource StoreSrc = getStoreSource(StoredVal);
18286 if (StoreSrc == StoreSource::Unknown)
18287 return false;
18288
18290 SDNode *RootNode;
18291 // Find potential store merge candidates by searching through chain sub-DAG
18293
18294 // Check if there is anything to merge.
18295 if (StoreNodes.size() < 2)
18296 return false;
18297
18298 // Sort the memory operands according to their distance from the
18299 // base pointer.
18300 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18301 return LHS.OffsetFromBase < RHS.OffsetFromBase;
18302 });
18303
18305 Attribute::NoImplicitFloat);
18306 bool IsNonTemporalStore = St->isNonTemporal();
18307 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18308 cast<LoadSDNode>(StoredVal)->isNonTemporal();
18309
18310 // Store Merge attempts to merge the lowest stores. This generally
18311 // works out as if successful, as the remaining stores are checked
18312 // after the first collection of stores is merged. However, in the
18313 // case that a non-mergeable store is found first, e.g., {p[-2],
18314 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18315 // mergeable cases. To prevent this, we prune such stores from the
18316 // front of StoreNodes here.
18317 bool MadeChange = false;
18318 while (StoreNodes.size() > 1) {
18319 unsigned NumConsecutiveStores =
18321 // There are no more stores in the list to examine.
18322 if (NumConsecutiveStores == 0)
18323 return MadeChange;
18324
18325 // We have at least 2 consecutive stores. Try to merge them.
18326 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
18327 switch (StoreSrc) {
18328 case StoreSource::Constant:
18330 MemVT, RootNode, AllowVectors);
18331 break;
18332
18333 case StoreSource::Extract:
18335 MemVT, RootNode);
18336 break;
18337
18338 case StoreSource::Load:
18340 MemVT, RootNode, AllowVectors,
18342 break;
18343
18344 default:
18345 llvm_unreachable("Unhandled store source type");
18346 }
18347 }
18348 return MadeChange;
18349}
18350
18351SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18352 SDLoc SL(ST);
18354
18355 // Replace the chain to avoid dependency.
18356 if (ST->isTruncatingStore()) {
18357 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18358 ST->getBasePtr(), ST->getMemoryVT(),
18359 ST->getMemOperand());
18360 } else {
18361 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18362 ST->getMemOperand());
18363 }
18364
18365 // Create token to keep both nodes around.
18366 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18367 MVT::Other, ST->getChain(), ReplStore);
18368
18369 // Make sure the new and old chains are cleaned up.
18370 AddToWorklist(Token.getNode());
18371
18372 // Don't add users to work list.
18373 return CombineTo(ST, Token, false);
18374}
18375
18376SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18377 SDValue Value = ST->getValue();
18378 if (Value.getOpcode() == ISD::TargetConstantFP)
18379 return SDValue();
18380
18381 if (!ISD::isNormalStore(ST))
18382 return SDValue();
18383
18384 SDLoc DL(ST);
18385
18386 SDValue Chain = ST->getChain();
18387 SDValue Ptr = ST->getBasePtr();
18388
18390
18391 // NOTE: If the original store is volatile, this transform must not increase
18392 // the number of stores. For example, on x86-32 an f64 can be stored in one
18393 // processor operation but an i64 (which is not legal) requires two. So the
18394 // transform should not be done in this case.
18395
18396 SDValue Tmp;
18397 switch (CFP->getSimpleValueType(0).SimpleTy) {
18398 default:
18399 llvm_unreachable("Unknown FP type");
18400 case MVT::f16: // We don't do this for these yet.
18401 case MVT::f80:
18402 case MVT::f128:
18403 case MVT::ppcf128:
18404 return SDValue();
18405 case MVT::f32:
18406 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18408 ;
18409 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18410 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18411 MVT::i32);
18412 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18413 }
18414
18415 return SDValue();
18416 case MVT::f64:
18417 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18418 ST->isSimple()) ||
18420 ;
18421 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18422 getZExtValue(), SDLoc(CFP), MVT::i64);
18423 return DAG.getStore(Chain, DL, Tmp,
18424 Ptr, ST->getMemOperand());
18425 }
18426
18427 if (ST->isSimple() &&
18429 // Many FP stores are not made apparent until after legalize, e.g. for
18430 // argument passing. Since this is so common, custom legalize the
18431 // 64-bit integer store into two 32-bit stores.
18433 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18434 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18435 if (DAG.getDataLayout().isBigEndian())
18436 std::swap(Lo, Hi);
18437
18438 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18439 AAMDNodes AAInfo = ST->getAAInfo();
18440
18441 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18442 ST->getOriginalAlign(), MMOFlags, AAInfo);
18443 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18444 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18445 ST->getPointerInfo().getWithOffset(4),
18446 ST->getOriginalAlign(), MMOFlags, AAInfo);
18448 St0, St1);
18449 }
18450
18451 return SDValue();
18452 }
18453}
18454
18455SDValue DAGCombiner::visitSTORE(SDNode *N) {
18457 SDValue Chain = ST->getChain();
18458 SDValue Value = ST->getValue();
18459 SDValue Ptr = ST->getBasePtr();
18460
18461 // If this is a store of a bit convert, store the input value if the
18462 // resultant store does not need a higher alignment than the original.
18463 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18464 ST->isUnindexed()) {
18465 EVT SVT = Value.getOperand(0).getValueType();
18466 // If the store is volatile, we only want to change the store type if the
18467 // resulting store is legal. Otherwise we might increase the number of
18468 // memory accesses. We don't care if the original type was legal or not
18469 // as we assume software couldn't rely on the number of accesses of an
18470 // illegal type.
18471 // TODO: May be able to relax for unordered atomics (see D66309)
18472 if (((!LegalOperations && ST->isSimple()) ||
18474 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18475 DAG, *ST->getMemOperand())) {
18476 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18477 ST->getMemOperand());
18478 }
18479 }
18480
18481 // Turn 'store undef, Ptr' -> nothing.
18482 if (Value.isUndef() && ST->isUnindexed())
18483 return Chain;
18484
18485 // Try to infer better alignment information than the store already has.
18486 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18487 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18488 if (*Alignment > ST->getAlign() &&
18489 isAligned(*Alignment, ST->getSrcValueOffset())) {
18491 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18492 ST->getMemoryVT(), *Alignment,
18493 ST->getMemOperand()->getFlags(), ST->getAAInfo());
18494 // NewStore will always be N as we are only refining the alignment
18495 assert(NewStore.getNode() == N);
18496 (void)NewStore;
18497 }
18498 }
18499 }
18500
18501 // Try transforming a pair floating point load / store ops to integer
18502 // load / store ops.
18504 return NewST;
18505
18506 // Try transforming several stores into STORE (BSWAP).
18507 if (SDValue Store = mergeTruncStores(ST))
18508 return Store;
18509
18510 if (ST->isUnindexed()) {
18511 // Walk up chain skipping non-aliasing memory nodes, on this store and any
18512 // adjacent stores.
18513 if (findBetterNeighborChains(ST)) {
18514 // replaceStoreChain uses CombineTo, which handled all of the worklist
18515 // manipulation. Return the original node to not do anything else.
18516 return SDValue(ST, 0);
18517 }
18518 Chain = ST->getChain();
18519 }
18520
18521 // FIXME: is there such a thing as a truncating indexed store?
18522 if (ST->isTruncatingStore() && ST->isUnindexed() &&
18523 Value.getValueType().isInteger() &&
18525 !cast<ConstantSDNode>(Value)->isOpaque())) {
18526 // Convert a truncating store of a extension into a standard store.
18527 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
18528 Value.getOpcode() == ISD::SIGN_EXTEND ||
18529 Value.getOpcode() == ISD::ANY_EXTEND) &&
18530 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
18531 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
18532 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18533 ST->getMemOperand());
18534
18536 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18537 ST->getMemoryVT().getScalarSizeInBits());
18538
18539 // See if we can simplify the input to this truncstore with knowledge that
18540 // only the low bits are being used. For example:
18541 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
18542 AddToWorklist(Value.getNode());
18544 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18545 ST->getMemOperand());
18546
18547 // Otherwise, see if we can simplify the operation with
18548 // SimplifyDemandedBits, which only works if the value has a single use.
18550 // Re-visit the store if anything changed and the store hasn't been merged
18551 // with another node (N is deleted) SimplifyDemandedBits will add Value's
18552 // node back to the worklist if necessary, but we also need to re-visit
18553 // the Store node itself.
18554 if (N->getOpcode() != ISD::DELETED_NODE)
18555 AddToWorklist(N);
18556 return SDValue(N, 0);
18557 }
18558 }
18559
18560 // If this is a load followed by a store to the same location, then the store
18561 // is dead/noop.
18562 // TODO: Can relax for unordered atomics (see D66309)
18564 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18565 ST->isUnindexed() && ST->isSimple() &&
18566 Ld->getAddressSpace() == ST->getAddressSpace() &&
18567 // There can't be any side effects between the load and store, such as
18568 // a call or store.
18569 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18570 // The store is dead, remove it.
18571 return Chain;
18572 }
18573 }
18574
18575 // TODO: Can relax for unordered atomics (see D66309)
18576 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18577 if (ST->isUnindexed() && ST->isSimple() &&
18578 ST1->isUnindexed() && ST1->isSimple()) {
18579 if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
18580 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
18581 ST->getAddressSpace() == ST1->getAddressSpace()) {
18582 // If this is a store followed by a store with the same value to the
18583 // same location, then the store is dead/noop.
18584 return Chain;
18585 }
18586
18587 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18588 !ST1->getBasePtr().isUndef() &&
18589 // BaseIndexOffset and the code below requires knowing the size
18590 // of a vector, so bail out if MemoryVT is scalable.
18591 !ST->getMemoryVT().isScalableVector() &&
18592 !ST1->getMemoryVT().isScalableVector() &&
18593 ST->getAddressSpace() == ST1->getAddressSpace()) {
18596 unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18597 unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18598 // If this is a store who's preceding store to a subset of the current
18599 // location and no one other node is chained to that store we can
18600 // effectively drop the store. Do not remove stores to undef as they may
18601 // be used as data sinks.
18602 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18603 CombineTo(ST1, ST1->getChain());
18604 return SDValue();
18605 }
18606 }
18607 }
18608 }
18609
18610 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18611 // truncating store. We can do this even if this is already a truncstore.
18612 if ((Value.getOpcode() == ISD::FP_ROUND ||
18613 Value.getOpcode() == ISD::TRUNCATE) &&
18614 Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18615 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18616 ST->getMemoryVT(), LegalOperations)) {
18617 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18618 Ptr, ST->getMemoryVT(), ST->getMemOperand());
18619 }
18620
18621 // Always perform this optimization before types are legal. If the target
18622 // prefers, also try this after legalization to catch stores that were created
18623 // by intrinsics or other nodes.
18624 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18625 while (true) {
18626 // There can be multiple store sequences on the same chain.
18627 // Keep trying to merge store sequences until we are unable to do so
18628 // or until we merge the last store on the chain.
18630 if (!Changed) break;
18631 // Return N as merge only uses CombineTo and no worklist clean
18632 // up is necessary.
18633 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18634 return SDValue(N, 0);
18635 }
18636 }
18637
18638 // Try transforming N to an indexed store.
18640 return SDValue(N, 0);
18641
18642 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18643 //
18644 // Make sure to do this only after attempting to merge stores in order to
18645 // avoid changing the types of some subset of stores due to visit order,
18646 // preventing their merging.
18647 if (isa<ConstantFPSDNode>(ST->getValue())) {
18649 return NewSt;
18650 }
18651
18653 return NewSt;
18654
18655 return ReduceLoadOpStoreWidth(N);
18656}
18657
18658SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18659 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18660 if (!LifetimeEnd->hasOffset())
18661 return SDValue();
18662
18663 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18664 LifetimeEnd->getOffset(), false);
18665
18666 // We walk up the chains to find stores.
18667 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18668 while (!Chains.empty()) {
18669 SDValue Chain = Chains.pop_back_val();
18670 if (!Chain.hasOneUse())
18671 continue;
18672 switch (Chain.getOpcode()) {
18673 case ISD::TokenFactor:
18674 for (unsigned Nops = Chain.getNumOperands(); Nops;)
18675 Chains.push_back(Chain.getOperand(--Nops));
18676 break;
18678 case ISD::LIFETIME_END:
18679 // We can forward past any lifetime start/end that can be proven not to
18680 // alias the node.
18681 if (!mayAlias(Chain.getNode(), N))
18682 Chains.push_back(Chain.getOperand(0));
18683 break;
18684 case ISD::STORE: {
18686 // TODO: Can relax for unordered atomics (see D66309)
18687 if (!ST->isSimple() || ST->isIndexed())
18688 continue;
18689 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18690 // The bounds of a scalable store are not known until runtime, so this
18691 // store cannot be elided.
18692 if (StoreSize.isScalable())
18693 continue;
18695 // If we store purely within object bounds just before its lifetime ends,
18696 // we can remove the store.
18697 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18698 StoreSize.getFixedSize() * 8)) {
18699 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18700 dbgs() << "\nwithin LIFETIME_END of : ";
18701 LifetimeEndBase.dump(); dbgs() << "\n");
18702 CombineTo(ST, ST->getChain());
18703 return SDValue(N, 0);
18704 }
18705 }
18706 }
18707 }
18708 return SDValue();
18709}
18710
18711/// For the instruction sequence of store below, F and I values
18712/// are bundled together as an i64 value before being stored into memory.
18713/// Sometimes it is more efficent to generate separate stores for F and I,
18714/// which can remove the bitwise instructions or sink them to colder places.
18715///
18716/// (store (or (zext (bitcast F to i32) to i64),
18717/// (shl (zext I to i64), 32)), addr) -->
18718/// (store F, addr) and (store I, addr+4)
18719///
18720/// Similarly, splitting for other merged store can also be beneficial, like:
18721/// For pair of {i32, i32}, i64 store --> two i32 stores.
18722/// For pair of {i32, i16}, i64 store --> two i32 stores.
18723/// For pair of {i16, i16}, i32 store --> two i16 stores.
18724/// For pair of {i16, i8}, i32 store --> two i16 stores.
18725/// For pair of {i8, i8}, i16 store --> two i8 stores.
18726///
18727/// We allow each target to determine specifically which kind of splitting is
18728/// supported.
18729///
18730/// The store patterns are commonly seen from the simple code snippet below
18731/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18732/// void goo(const std::pair<int, float> &);
18733/// hoo() {
18734/// ...
18735/// goo(std::make_pair(tmp, ftmp));
18736/// ...
18737/// }
18738///
18739SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18740 if (OptLevel == CodeGenOpt::None)
18741 return SDValue();
18742
18743 // Can't change the number of memory accesses for a volatile store or break
18744 // atomicity for an atomic one.
18745 if (!ST->isSimple())
18746 return SDValue();
18747
18748 SDValue Val = ST->getValue();
18749 SDLoc DL(ST);
18750
18751 // Match OR operand.
18752 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18753 return SDValue();
18754
18755 // Match SHL operand and get Lower and Higher parts of Val.
18756 SDValue Op1 = Val.getOperand(0);
18757 SDValue Op2 = Val.getOperand(1);
18758 SDValue Lo, Hi;
18759 if (Op1.getOpcode() != ISD::SHL) {
18760 std::swap(Op1, Op2);
18761 if (Op1.getOpcode() != ISD::SHL)
18762 return SDValue();
18763 }
18764 Lo = Op2;
18765 Hi = Op1.getOperand(0);
18766 if (!Op1.hasOneUse())
18767 return SDValue();
18768
18769 // Match shift amount to HalfValBitSize.
18770 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18772 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18773 return SDValue();
18774
18775 // Lo and Hi are zero-extended from int with size less equal than 32
18776 // to i64.
18777 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18778 !Lo.getOperand(0).getValueType().isScalarInteger() ||
18779 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18780 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18781 !Hi.getOperand(0).getValueType().isScalarInteger() ||
18782 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18783 return SDValue();
18784
18785 // Use the EVT of low and high parts before bitcast as the input
18786 // of target query.
18787 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18788 ? Lo.getOperand(0).getValueType()
18789 : Lo.getValueType();
18790 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18791 ? Hi.getOperand(0).getValueType()
18792 : Hi.getValueType();
18794 return SDValue();
18795
18796 // Start to split store.
18797 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18798 AAMDNodes AAInfo = ST->getAAInfo();
18799
18800 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18802 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18803 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18804
18805 SDValue Chain = ST->getChain();
18806 SDValue Ptr = ST->getBasePtr();
18807 // Lower value store.
18808 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18809 ST->getOriginalAlign(), MMOFlags, AAInfo);
18811 // Higher value store.
18812 SDValue St1 = DAG.getStore(
18813 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18814 ST->getOriginalAlign(), MMOFlags, AAInfo);
18815 return St1;
18816}
18817
18818/// Convert a disguised subvector insertion into a shuffle:
18819SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18820 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18821 "Expected extract_vector_elt");
18822 SDValue InsertVal = N->getOperand(1);
18823 SDValue Vec = N->getOperand(0);
18824
18825 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18826 // InsIndex)
18827 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
18828 // CONCAT_VECTORS.
18829 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18830 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18831 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18833 ArrayRef<int> Mask = SVN->getMask();
18834
18835 SDValue X = Vec.getOperand(0);
18836 SDValue Y = Vec.getOperand(1);
18837
18838 // Vec's operand 0 is using indices from 0 to N-1 and
18839 // operand 1 from N to 2N - 1, where N is the number of
18840 // elements in the vectors.
18841 SDValue InsertVal0 = InsertVal.getOperand(0);
18842 int ElementOffset = -1;
18843
18844 // We explore the inputs of the shuffle in order to see if we find the
18845 // source of the extract_vector_elt. If so, we can use it to modify the
18846 // shuffle rather than perform an insert_vector_elt.
18848 ArgWorkList.emplace_back(Mask.size(), Y);
18849 ArgWorkList.emplace_back(0, X);
18850
18851 while (!ArgWorkList.empty()) {
18852 int ArgOffset;
18854 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18855
18856 if (ArgVal == InsertVal0) {
18858 break;
18859 }
18860
18861 // Peek through concat_vector.
18862 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18863 int CurrentArgOffset =
18864 ArgOffset + ArgVal.getValueType().getVectorNumElements();
18865 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18866 for (SDValue Op : reverse(ArgVal->ops())) {
18867 CurrentArgOffset -= Step;
18868 ArgWorkList.emplace_back(CurrentArgOffset, Op);
18869 }
18870
18871 // Make sure we went through all the elements and did not screw up index
18872 // computation.
18874 }
18875 }
18876
18877 if (ElementOffset != -1) {
18878 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18879
18880 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18881 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18883 (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18884 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18885
18888 Y, NewMask, DAG);
18889 if (LegalShuffle)
18890 return LegalShuffle;
18891 }
18892 }
18893
18894 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18895 // bitcast(shuffle (bitcast V), (extended X), Mask)
18896 // Note: We do not use an insert_subvector node because that requires a
18897 // legal subvector type.
18898 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18899 !InsertVal.getOperand(0).getValueType().isVector())
18900 return SDValue();
18901
18902 SDValue SubVec = InsertVal.getOperand(0);
18903 SDValue DestVec = N->getOperand(0);
18904 EVT SubVecVT = SubVec.getValueType();
18905 EVT VT = DestVec.getValueType();
18906 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18907 // If the source only has a single vector element, the cost of creating adding
18908 // it to a vector is likely to exceed the cost of a insert_vector_elt.
18909 if (NumSrcElts == 1)
18910 return SDValue();
18911 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18912 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18913
18914 // Step 1: Create a shuffle mask that implements this insert operation. The
18915 // vector that we are inserting into will be operand 0 of the shuffle, so
18916 // those elements are just 'i'. The inserted subvector is in the first
18917 // positions of operand 1 of the shuffle. Example:
18918 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18920 for (unsigned i = 0; i != NumMaskVals; ++i) {
18921 if (i / NumSrcElts == InsIndex)
18922 Mask[i] = (i % NumSrcElts) + NumMaskVals;
18923 else
18924 Mask[i] = i;
18925 }
18926
18927 // Bail out if the target can not handle the shuffle we want to create.
18928 EVT SubVecEltVT = SubVecVT.getVectorElementType();
18930 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18931 return SDValue();
18932
18933 // Step 2: Create a wide vector from the inserted source vector by appending
18934 // undefined elements. This is the same size as our destination vector.
18935 SDLoc DL(N);
18937 ConcatOps[0] = SubVec;
18939
18940 // Step 3: Shuffle in the padded subvector.
18943 AddToWorklist(PaddedSubV.getNode());
18944 AddToWorklist(DestVecBC.getNode());
18945 AddToWorklist(Shuf.getNode());
18946 return DAG.getBitcast(VT, Shuf);
18947}
18948
18949SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18950 SDValue InVec = N->getOperand(0);
18951 SDValue InVal = N->getOperand(1);
18952 SDValue EltNo = N->getOperand(2);
18953 SDLoc DL(N);
18954
18955 EVT VT = InVec.getValueType();
18957
18958 // Insert into out-of-bounds element is undefined.
18959 if (IndexC && VT.isFixedLengthVector() &&
18960 IndexC->getZExtValue() >= VT.getVectorNumElements())
18961 return DAG.getUNDEF(VT);
18962
18963 // Remove redundant insertions:
18964 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18965 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18966 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18967 return InVec;
18968
18969 if (!IndexC) {
18970 // If this is variable insert to undef vector, it might be better to splat:
18971 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18972 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18973 if (VT.isScalableVector())
18974 return DAG.getSplatVector(VT, DL, InVal);
18975 else {
18977 return DAG.getBuildVector(VT, DL, Ops);
18978 }
18979 }
18980 return SDValue();
18981 }
18982
18983 if (VT.isScalableVector())
18984 return SDValue();
18985
18986 unsigned NumElts = VT.getVectorNumElements();
18987
18988 // We must know which element is being inserted for folds below here.
18989 unsigned Elt = IndexC->getZExtValue();
18991 return Shuf;
18992
18993 // Canonicalize insert_vector_elt dag nodes.
18994 // Example:
18995 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18996 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18997 //
18998 // Do this only if the child insert_vector node has one use; also
18999 // do this only if indices are both constants and Idx1 < Idx0.
19000 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
19001 && isa<ConstantSDNode>(InVec.getOperand(2))) {
19002 unsigned OtherElt = InVec.getConstantOperandVal(2);
19003 if (Elt < OtherElt) {
19004 // Swap nodes.
19006 InVec.getOperand(0), InVal, EltNo);
19007 AddToWorklist(NewOp.getNode());
19008 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
19009 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
19010 }
19011 }
19012
19013 // If we can't generate a legal BUILD_VECTOR, exit
19014 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
19015 return SDValue();
19016
19017 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
19018 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
19019 // vector elements.
19021 // Do not combine these two vectors if the output vector will not replace
19022 // the input vector.
19023 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
19024 Ops.append(InVec.getNode()->op_begin(),
19025 InVec.getNode()->op_end());
19026 } else if (InVec.isUndef()) {
19027 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
19028 } else {
19029 return SDValue();
19030 }
19031 assert(Ops.size() == NumElts && "Unexpected vector size");
19032
19033 // Insert the element
19034 if (Elt < Ops.size()) {
19035 // All the operands of BUILD_VECTOR must have the same type;
19036 // we enforce that here.
19037 EVT OpVT = Ops[0].getValueType();
19038 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
19039 }
19040
19041 // Return the new vector
19042 return DAG.getBuildVector(VT, DL, Ops);
19043}
19044
19045SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
19046 SDValue EltNo,
19048 assert(OriginalLoad->isSimple());
19049
19050 EVT ResultVT = EVE->getValueType(0);
19051 EVT VecEltVT = InVecVT.getVectorElementType();
19052
19053 // If the vector element type is not a multiple of a byte then we are unable
19054 // to correctly compute an address to load only the extracted element as a
19055 // scalar.
19056 if (!VecEltVT.isByteSized())
19057 return SDValue();
19058
19059 ISD::LoadExtType ExtTy =
19063 return SDValue();
19064
19065 Align Alignment = OriginalLoad->getAlign();
19067 SDLoc DL(EVE);
19069 int Elt = ConstEltNo->getZExtValue();
19070 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
19071 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
19072 Alignment = commonAlignment(Alignment, PtrOff);
19073 } else {
19074 // Discard the pointer info except the address space because the memory
19075 // operand can't represent this new access since the offset is variable.
19076 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
19077 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
19078 }
19079
19080 bool IsFast = false;
19081 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
19082 OriginalLoad->getAddressSpace(), Alignment,
19083 OriginalLoad->getMemOperand()->getFlags(),
19084 &IsFast) ||
19085 !IsFast)
19086 return SDValue();
19087
19088 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
19089 InVecVT, EltNo);
19090
19091 // The replacement we need to do here is a little tricky: we need to
19092 // replace an extractelement of a load with a load.
19093 // Use ReplaceAllUsesOfValuesWith to do the replacement.
19094 // Note that this replacement assumes that the extractvalue is the only
19095 // use of the load; that's okay because we don't want to perform this
19096 // transformation in other cases anyway.
19097 SDValue Load;
19098 SDValue Chain;
19099 if (ResultVT.bitsGT(VecEltVT)) {
19100 // If the result type of vextract is wider than the load, then issue an
19101 // extending load instead.
19103 VecEltVT)
19105 : ISD::EXTLOAD;
19106 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
19107 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
19108 Alignment, OriginalLoad->getMemOperand()->getFlags(),
19109 OriginalLoad->getAAInfo());
19110 Chain = Load.getValue(1);
19111 } else {
19112 Load = DAG.getLoad(
19113 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
19114 OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
19115 Chain = Load.getValue(1);
19116 if (ResultVT.bitsLT(VecEltVT))
19117 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
19118 else
19119 Load = DAG.getBitcast(ResultVT, Load);
19120 }
19121 WorklistRemover DeadNodes(*this);
19122 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
19123 SDValue To[] = { Load, Chain };
19124 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
19125 // Make sure to revisit this node to clean it up; it will usually be dead.
19126 AddToWorklist(EVE);
19127 // Since we're explicitly calling ReplaceAllUses, add the new node to the
19128 // worklist explicitly as well.
19129 AddToWorklistWithUsers(Load.getNode());
19130 ++OpsNarrowed;
19131 return SDValue(EVE, 0);
19132}
19133
19134/// Transform a vector binary operation into a scalar binary operation by moving
19135/// the math/logic after an extract element of a vector.
19137 bool LegalOperations) {
19138 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19139 SDValue Vec = ExtElt->getOperand(0);
19140 SDValue Index = ExtElt->getOperand(1);
19141 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19142 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19143 Vec.getNode()->getNumValues() != 1)
19144 return SDValue();
19145
19146 // Targets may want to avoid this to prevent an expensive register transfer.
19147 if (!TLI.shouldScalarizeBinop(Vec))
19148 return SDValue();
19149
19150 // Extracting an element of a vector constant is constant-folded, so this
19151 // transform is just replacing a vector op with a scalar op while moving the
19152 // extract.
19153 SDValue Op0 = Vec.getOperand(0);
19154 SDValue Op1 = Vec.getOperand(1);
19155 if (isAnyConstantBuildVector(Op0, true) ||
19156 isAnyConstantBuildVector(Op1, true)) {
19157 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19158 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19159 SDLoc DL(ExtElt);
19160 EVT VT = ExtElt->getValueType(0);
19161 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19162 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19163 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19164 }
19165
19166 return SDValue();
19167}
19168
19169SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19170 SDValue VecOp = N->getOperand(0);
19171 SDValue Index = N->getOperand(1);
19172 EVT ScalarVT = N->getValueType(0);
19173 EVT VecVT = VecOp.getValueType();
19174 if (VecOp.isUndef())
19175 return DAG.getUNDEF(ScalarVT);
19176
19177 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19178 //
19179 // This only really matters if the index is non-constant since other combines
19180 // on the constant elements already work.
19181 SDLoc DL(N);
19182 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19183 Index == VecOp.getOperand(2)) {
19184 SDValue Elt = VecOp.getOperand(1);
19185 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19186 }
19187
19188 // (vextract (scalar_to_vector val, 0) -> val
19189 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19190 // Only 0'th element of SCALAR_TO_VECTOR is defined.
19191 if (DAG.isKnownNeverZero(Index))
19192 return DAG.getUNDEF(ScalarVT);
19193
19194 // Check if the result type doesn't match the inserted element type. A
19195 // SCALAR_TO_VECTOR may truncate the inserted element and the
19196 // EXTRACT_VECTOR_ELT may widen the extracted vector.
19197 SDValue InOp = VecOp.getOperand(0);
19198 if (InOp.getValueType() != ScalarVT) {
19199 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19200 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19201 }
19202 return InOp;
19203 }
19204
19205 // extract_vector_elt of out-of-bounds element -> UNDEF
19206 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19207 if (IndexC && VecVT.isFixedLengthVector() &&
19208 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19209 return DAG.getUNDEF(ScalarVT);
19210
19211 // extract_vector_elt (build_vector x, y), 1 -> y
19212 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19213 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19214 TLI.isTypeLegal(VecVT) &&
19215 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19216 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
19217 VecVT.isFixedLengthVector()) &&
19218 "BUILD_VECTOR used for scalable vectors");
19219 unsigned IndexVal =
19220 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19221 SDValue Elt = VecOp.getOperand(IndexVal);
19222 EVT InEltVT = Elt.getValueType();
19223
19224 // Sometimes build_vector's scalar input types do not match result type.
19225 if (ScalarVT == InEltVT)
19226 return Elt;
19227
19228 // TODO: It may be useful to truncate if free if the build_vector implicitly
19229 // converts.
19230 }
19231
19232 if (VecVT.isScalableVector())
19233 return SDValue();
19234
19235 // All the code from this point onwards assumes fixed width vectors, but it's
19236 // possible that some of the combinations could be made to work for scalable
19237 // vectors too.
19238 unsigned NumElts = VecVT.getVectorNumElements();
19239 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19240
19241 // TODO: These transforms should not require the 'hasOneUse' restriction, but
19242 // there are regressions on multiple targets without it. We can end up with a
19243 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19244 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19245 VecOp.hasOneUse()) {
19246 // The vector index of the LSBs of the source depend on the endian-ness.
19247 bool IsLE = DAG.getDataLayout().isLittleEndian();
19248 unsigned ExtractIndex = IndexC->getZExtValue();
19249 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19250 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19251 SDValue BCSrc = VecOp.getOperand(0);
19252 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19253 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19254
19255 if (LegalTypes && BCSrc.getValueType().isInteger() &&
19256 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19257 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19258 // trunc i64 X to i32
19259 SDValue X = BCSrc.getOperand(0);
19260 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
19261 "Extract element and scalar to vector can't change element type "
19262 "from FP to integer.");
19263 unsigned XBitWidth = X.getValueSizeInBits();
19265
19266 // An extract element return value type can be wider than its vector
19267 // operand element type. In that case, the high bits are undefined, so
19268 // it's possible that we may need to extend rather than truncate.
19271 "Scalar bitwidth must be a multiple of vector element bitwidth");
19272 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19273 }
19274 }
19275 }
19276
19277 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19278 return BO;
19279
19280 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19281 // We only perform this optimization before the op legalization phase because
19282 // we may introduce new vector instructions which are not backed by TD
19283 // patterns. For example on AVX, extracting elements from a wide vector
19284 // without using extract_subvector. However, if we can find an underlying
19285 // scalar value, then we can always use that.
19286 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19288 // Find the new index to extract from.
19289 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19290
19291 // Extracting an undef index is undef.
19292 if (OrigElt == -1)
19293 return DAG.getUNDEF(ScalarVT);
19294
19295 // Select the right vector half to extract from.
19297 if (OrigElt < (int)NumElts) {
19298 SVInVec = VecOp.getOperand(0);
19299 } else {
19300 SVInVec = VecOp.getOperand(1);
19301 OrigElt -= NumElts;
19302 }
19303
19304 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19305 SDValue InOp = SVInVec.getOperand(OrigElt);
19306 if (InOp.getValueType() != ScalarVT) {
19307 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19309 }
19310
19311 return InOp;
19312 }
19313
19314 // FIXME: We should handle recursing on other vector shuffles and
19315 // scalar_to_vector here as well.
19316
19317 if (!LegalOperations ||
19318 // FIXME: Should really be just isOperationLegalOrCustom.
19323 }
19324 }
19325
19326 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19327 // simplify it based on the (valid) extraction indices.
19328 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19329 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19330 Use->getOperand(0) == VecOp &&
19331 isa<ConstantSDNode>(Use->getOperand(1));
19332 })) {
19334 for (SDNode *Use : VecOp->uses()) {
19335 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19336 if (CstElt->getAPIntValue().ult(NumElts))
19337 DemandedElts.setBit(CstElt->getZExtValue());
19338 }
19340 // We simplified the vector operand of this extract element. If this
19341 // extract is not dead, visit it again so it is folded properly.
19342 if (N->getOpcode() != ISD::DELETED_NODE)
19343 AddToWorklist(N);
19344 return SDValue(N, 0);
19345 }
19348 // We simplified the vector operand of this extract element. If this
19349 // extract is not dead, visit it again so it is folded properly.
19350 if (N->getOpcode() != ISD::DELETED_NODE)
19351 AddToWorklist(N);
19352 return SDValue(N, 0);
19353 }
19354 }
19355
19356 // Everything under here is trying to match an extract of a loaded value.
19357 // If the result of load has to be truncated, then it's not necessarily
19358 // profitable.
19359 bool BCNumEltsChanged = false;
19360 EVT ExtVT = VecVT.getVectorElementType();
19361 EVT LVT = ExtVT;
19362 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19363 return SDValue();
19364
19365 if (VecOp.getOpcode() == ISD::BITCAST) {
19366 // Don't duplicate a load with other uses.
19367 if (!VecOp.hasOneUse())
19368 return SDValue();
19369
19370 EVT BCVT = VecOp.getOperand(0).getValueType();
19371 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19372 return SDValue();
19373 if (NumElts != BCVT.getVectorNumElements())
19374 BCNumEltsChanged = true;
19375 VecOp = VecOp.getOperand(0);
19376 ExtVT = BCVT.getVectorElementType();
19377 }
19378
19379 // extract (vector load $addr), i --> load $addr + i * size
19380 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19381 ISD::isNormalLoad(VecOp.getNode()) &&
19382 !Index->hasPredecessor(VecOp.getNode())) {
19384 if (VecLoad && VecLoad->isSimple())
19385 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19386 }
19387
19388 // Perform only after legalization to ensure build_vector / vector_shuffle
19389 // optimizations have already been done.
19390 if (!LegalOperations || !IndexC)
19391 return SDValue();
19392
19393 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19394 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19395 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19396 int Elt = IndexC->getZExtValue();
19397 LoadSDNode *LN0 = nullptr;
19398 if (ISD::isNormalLoad(VecOp.getNode())) {
19400 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19401 VecOp.getOperand(0).getValueType() == ExtVT &&
19402 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19403 // Don't duplicate a load with other uses.
19404 if (!VecOp.hasOneUse())
19405 return SDValue();
19406
19407 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19408 }
19410 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19411 // =>
19412 // (load $addr+1*size)
19413
19414 // Don't duplicate a load with other uses.
19415 if (!VecOp.hasOneUse())
19416 return SDValue();
19417
19418 // If the bit convert changed the number of elements, it is unsafe
19419 // to examine the mask.
19420 if (BCNumEltsChanged)
19421 return SDValue();
19422
19423 // Select the input vector, guarding against out of range extract vector.
19424 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19425 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19426
19427 if (VecOp.getOpcode() == ISD::BITCAST) {
19428 // Don't duplicate a load with other uses.
19429 if (!VecOp.hasOneUse())
19430 return SDValue();
19431
19432 VecOp = VecOp.getOperand(0);
19433 }
19434 if (ISD::isNormalLoad(VecOp.getNode())) {
19436 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19437 Index = DAG.getConstant(Elt, DL, Index.getValueType());
19438 }
19439 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19440 VecVT.getVectorElementType() == ScalarVT &&
19441 (!LegalTypes ||
19442 TLI.isTypeLegal(
19443 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19444 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19445 // -> extract_vector_elt a, 0
19446 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19447 // -> extract_vector_elt a, 1
19448 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19449 // -> extract_vector_elt b, 0
19450 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19451 // -> extract_vector_elt b, 1
19452 SDLoc SL(N);
19453 EVT ConcatVT = VecOp.getOperand(0).getValueType();
19454 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19455 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19456 Index.getValueType());
19457
19458 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19460 ConcatVT.getVectorElementType(),
19461 ConcatOp, NewIdx);
19462 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19463 }
19464
19465 // Make sure we found a non-volatile load and the extractelement is
19466 // the only use.
19467 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19468 return SDValue();
19469
19470 // If Idx was -1 above, Elt is going to be -1, so just return undef.
19471 if (Elt == -1)
19472 return DAG.getUNDEF(LVT);
19473
19474 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19475}
19476
19477// Simplify (build_vec (ext )) to (bitcast (build_vec ))
19478SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19479 // We perform this optimization post type-legalization because
19480 // the type-legalizer often scalarizes integer-promoted vectors.
19481 // Performing this optimization before may create bit-casts which
19482 // will be type-legalized to complex code sequences.
19483 // We perform this optimization only before the operation legalizer because we
19484 // may introduce illegal operations.
19485 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19486 return SDValue();
19487
19488 unsigned NumInScalars = N->getNumOperands();
19489 SDLoc DL(N);
19490 EVT VT = N->getValueType(0);
19491
19492 // Check to see if this is a BUILD_VECTOR of a bunch of values
19493 // which come from any_extend or zero_extend nodes. If so, we can create
19494 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19495 // optimizations. We do not handle sign-extend because we can't fill the sign
19496 // using shuffles.
19498 bool AllAnyExt = true;
19499
19500 for (unsigned i = 0; i != NumInScalars; ++i) {
19501 SDValue In = N->getOperand(i);
19502 // Ignore undef inputs.
19503 if (In.isUndef()) continue;
19504
19505 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
19506 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19507
19508 // Abort if the element is not an extension.
19509 if (!ZeroExt && !AnyExt) {
19511 break;
19512 }
19513
19514 // The input is a ZeroExt or AnyExt. Check the original type.
19515 EVT InTy = In.getOperand(0).getValueType();
19516
19517 // Check that all of the widened source types are the same.
19518 if (SourceType == MVT::Other)
19519 // First time.
19520 SourceType = InTy;
19521 else if (InTy != SourceType) {
19522 // Multiple income types. Abort.
19524 break;
19525 }
19526
19527 // Check if all of the extends are ANY_EXTENDs.
19528 AllAnyExt &= AnyExt;
19529 }
19530
19531 // In order to have valid types, all of the inputs must be extended from the
19532 // same source type and all of the inputs must be any or zero extend.
19533 // Scalar sizes must be a power of two.
19535 bool ValidTypes = SourceType != MVT::Other &&
19536 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19537 isPowerOf2_32(SourceType.getSizeInBits());
19538
19539 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19540 // turn into a single shuffle instruction.
19541 if (!ValidTypes)
19542 return SDValue();
19543
19544 // If we already have a splat buildvector, then don't fold it if it means
19545 // introducing zeros.
19546 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19547 return SDValue();
19548
19549 bool isLE = DAG.getDataLayout().isLittleEndian();
19550 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19551 assert(ElemRatio > 1 && "Invalid element size ratio");
19552 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19553 DAG.getConstant(0, DL, SourceType);
19554
19555 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19557
19558 // Populate the new build_vector
19559 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19560 SDValue Cast = N->getOperand(i);
19561 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19562 Cast.getOpcode() == ISD::ZERO_EXTEND ||
19563 Cast.isUndef()) && "Invalid cast opcode");
19564 SDValue In;
19565 if (Cast.isUndef())
19566 In = DAG.getUNDEF(SourceType);
19567 else
19568 In = Cast->getOperand(0);
19569 unsigned Index = isLE ? (i * ElemRatio) :
19570 (i * ElemRatio + (ElemRatio - 1));
19571
19572 assert(Index < Ops.size() && "Invalid index");
19573 Ops[Index] = In;
19574 }
19575
19576 // The type of the new BUILD_VECTOR node.
19578 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19579 "Invalid vector size");
19580 // Check if the new vector type is legal.
19581 if (!isTypeLegal(VecVT) ||
19582 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19584 return SDValue();
19585
19586 // Make the new BUILD_VECTOR.
19587 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19588
19589 // The new BUILD_VECTOR node has the potential to be further optimized.
19590 AddToWorklist(BV.getNode());
19591 // Bitcast to the desired type.
19592 return DAG.getBitcast(VT, BV);
19593}
19594
19595// Simplify (build_vec (trunc $1)
19596// (trunc (srl $1 half-width))
19597// (trunc (srl $1 (2 * half-width))) …)
19598// to (bitcast $1)
19599SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19600 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19601
19602 // Only for little endian
19603 if (!DAG.getDataLayout().isLittleEndian())
19604 return SDValue();
19605
19606 SDLoc DL(N);
19607 EVT VT = N->getValueType(0);
19609 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19610
19611 // Only for power of two types to be sure that bitcast works well
19613 return SDValue();
19614
19615 unsigned NumInScalars = N->getNumOperands();
19616
19617 // Look through bitcasts
19618 auto PeekThroughBitcast = [](SDValue Op) {
19619 if (Op.getOpcode() == ISD::BITCAST)
19620 return Op.getOperand(0);
19621 return Op;
19622 };
19623
19624 // The source value where all the parts are extracted.
19625 SDValue Src;
19626 for (unsigned i = 0; i != NumInScalars; ++i) {
19627 SDValue In = PeekThroughBitcast(N->getOperand(i));
19628 // Ignore undef inputs.
19629 if (In.isUndef()) continue;
19630
19631 if (In.getOpcode() != ISD::TRUNCATE)
19632 return SDValue();
19633
19634 In = PeekThroughBitcast(In.getOperand(0));
19635
19636 if (In.getOpcode() != ISD::SRL) {
19637 // For now only build_vec without shuffling, handle shifts here in the
19638 // future.
19639 if (i != 0)
19640 return SDValue();
19641
19642 Src = In;
19643 } else {
19644 // In is SRL
19645 SDValue part = PeekThroughBitcast(In.getOperand(0));
19646
19647 if (!Src) {
19648 Src = part;
19649 } else if (Src != part) {
19650 // Vector parts do not stem from the same variable
19651 return SDValue();
19652 }
19653
19654 SDValue ShiftAmtVal = In.getOperand(1);
19656 return SDValue();
19657
19658 uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19659
19660 // The extracted value is not extracted at the right position
19661 if (ShiftAmt != i * ScalarTypeBitsize)
19662 return SDValue();
19663 }
19664 }
19665
19666 // Only cast if the size is the same
19667 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19668 return SDValue();
19669
19670 return DAG.getBitcast(VT, Src);
19671}
19672
19673SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19676 unsigned LeftIdx, bool DidSplitVec) {
19678
19679 EVT VT = N->getValueType(0);
19680 EVT InVT1 = VecIn1.getValueType();
19681 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19682
19683 unsigned NumElems = VT.getVectorNumElements();
19684 unsigned ShuffleNumElems = NumElems;
19685
19686 // If we artificially split a vector in two already, then the offsets in the
19687 // operands will all be based off of VecIn1, even those in VecIn2.
19688 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19689
19691 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19692 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19693
19695 "Inputs must be sorted to be in non-increasing vector size order.");
19696
19697 // We can't generate a shuffle node with mismatched input and output types.
19698 // Try to make the types match the type of the output.
19699 if (InVT1 != VT || InVT2 != VT) {
19700 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19701 // If the output vector length is a multiple of both input lengths,
19702 // we can concatenate them and pad the rest with undefs.
19703 unsigned NumConcats = VTSize / InVT1Size;
19704 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19706 ConcatOps[0] = VecIn1;
19707 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19709 VecIn2 = SDValue();
19710 } else if (InVT1Size == VTSize * 2) {
19711 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19712 return SDValue();
19713
19714 if (!VecIn2.getNode()) {
19715 // If we only have one input vector, and it's twice the size of the
19716 // output, split it in two.
19720 // Since we now have shorter input vectors, adjust the offset of the
19721 // second vector's start.
19723 } else {
19725 "Second input is not going to be larger than the first one.");
19726
19727 // VecIn1 is wider than the output, and we have another, possibly
19728 // smaller input. Pad the smaller input with undefs, shuffle at the
19729 // input vector width, and extract the output.
19730 // The shuffle type is different than VT, so check legality again.
19731 if (LegalOperations &&
19733 return SDValue();
19734
19735 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19736 // lower it back into a BUILD_VECTOR. So if the inserted type is
19737 // illegal, don't even try.
19738 if (InVT1 != InVT2) {
19739 if (!TLI.isTypeLegal(InVT2))
19740 return SDValue();
19742 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19743 }
19745 }
19746 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19748 ConcatOps[0] = VecIn2;
19750 } else {
19751 // TODO: Support cases where the length mismatch isn't exactly by a
19752 // factor of 2.
19753 // TODO: Move this check upwards, so that if we have bad type
19754 // mismatches, we don't create any DAG nodes.
19755 return SDValue();
19756 }
19757 }
19758
19759 // Initialize mask to undef.
19761
19762 // Only need to run up to the number of elements actually used, not the
19763 // total number of elements in the shuffle - if we are shuffling a wider
19764 // vector, the high lanes should be set to undef.
19765 for (unsigned i = 0; i != NumElems; ++i) {
19766 if (VectorMask[i] <= 0)
19767 continue;
19768
19769 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19770 if (VectorMask[i] == (int)LeftIdx) {
19771 Mask[i] = ExtIndex;
19772 } else if (VectorMask[i] == (int)LeftIdx + 1) {
19773 Mask[i] = Vec2Offset + ExtIndex;
19774 }
19775 }
19776
19777 // The type the input vectors may have changed above.
19778 InVT1 = VecIn1.getValueType();
19779
19780 // If we already have a VecIn2, it should have the same type as VecIn1.
19781 // If we don't, get an undef/zero vector of the appropriate type.
19782 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19783 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19784
19785 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19787 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19788
19789 return Shuffle;
19790}
19791
19793 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19794
19795 // First, determine where the build vector is not undef.
19796 // TODO: We could extend this to handle zero elements as well as undefs.
19797 int NumBVOps = BV->getNumOperands();
19798 int ZextElt = -1;
19799 for (int i = 0; i != NumBVOps; ++i) {
19800 SDValue Op = BV->getOperand(i);
19801 if (Op.isUndef())
19802 continue;
19803 if (ZextElt == -1)
19804 ZextElt = i;
19805 else
19806 return SDValue();
19807 }
19808 // Bail out if there's no non-undef element.
19809 if (ZextElt == -1)
19810 return SDValue();
19811
19812 // The build vector contains some number of undef elements and exactly
19813 // one other element. That other element must be a zero-extended scalar
19814 // extracted from a vector at a constant index to turn this into a shuffle.
19815 // Also, require that the build vector does not implicitly truncate/extend
19816 // its elements.
19817 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19818 EVT VT = BV->getValueType(0);
19819 SDValue Zext = BV->getOperand(ZextElt);
19820 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19821 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19822 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19823 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19824 return SDValue();
19825
19826 // The zero-extend must be a multiple of the source size, and we must be
19827 // building a vector of the same size as the source of the extract element.
19828 SDValue Extract = Zext.getOperand(0);
19829 unsigned DestSize = Zext.getValueSizeInBits();
19830 unsigned SrcSize = Extract.getValueSizeInBits();
19831 if (DestSize % SrcSize != 0 ||
19832 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19833 return SDValue();
19834
19835 // Create a shuffle mask that will combine the extracted element with zeros
19836 // and undefs.
19837 int ZextRatio = DestSize / SrcSize;
19840 for (int i = 0; i != NumMaskElts; ++i) {
19841 if (i / ZextRatio == ZextElt) {
19842 // The low bits of the (potentially translated) extracted element map to
19843 // the source vector. The high bits map to zero. We will use a zero vector
19844 // as the 2nd source operand of the shuffle, so use the 1st element of
19845 // that vector (mask value is number-of-elements) for the high bits.
19846 if (i % ZextRatio == 0)
19847 ShufMask[i] = Extract.getConstantOperandVal(1);
19848 else
19849 ShufMask[i] = NumMaskElts;
19850 }
19851
19852 // Undef elements of the build vector remain undef because we initialize
19853 // the shuffle mask with -1.
19854 }
19855
19856 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19857 // bitcast (shuffle V, ZeroVec, VectorMask)
19858 SDLoc DL(BV);
19859 EVT VecVT = Extract.getOperand(0).getValueType();
19860 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19861 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19862 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19863 ZeroVec, ShufMask, DAG);
19864 if (!Shuf)
19865 return SDValue();
19866 return DAG.getBitcast(VT, Shuf);
19867}
19868
19869// FIXME: promote to STLExtras.
19870template <typename R, typename T>
19871static auto getFirstIndexOf(R &&Range, const T &Val) {
19872 auto I = find(Range, Val);
19873 if (I == Range.end())
19874 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19875 return std::distance(Range.begin(), I);
19876}
19877
19878// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19879// operations. If the types of the vectors we're extracting from allow it,
19880// turn this into a vector_shuffle node.
19881SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19882 SDLoc DL(N);
19883 EVT VT = N->getValueType(0);
19884
19885 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19886 if (!isTypeLegal(VT))
19887 return SDValue();
19888
19890 return V;
19891
19892 // May only combine to shuffle after legalize if shuffle is legal.
19893 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19894 return SDValue();
19895
19896 bool UsesZeroVector = false;
19897 unsigned NumElems = N->getNumOperands();
19898
19899 // Record, for each element of the newly built vector, which input vector
19900 // that element comes from. -1 stands for undef, 0 for the zero vector,
19901 // and positive values for the input vectors.
19902 // VectorMask maps each element to its vector number, and VecIn maps vector
19903 // numbers to their initial SDValues.
19904
19907 VecIn.push_back(SDValue());
19908
19909 for (unsigned i = 0; i != NumElems; ++i) {
19910 SDValue Op = N->getOperand(i);
19911
19912 if (Op.isUndef())
19913 continue;
19914
19915 // See if we can use a blend with a zero vector.
19916 // TODO: Should we generalize this to a blend with an arbitrary constant
19917 // vector?
19918 if (isNullConstant(Op) || isNullFPConstant(Op)) {
19919 UsesZeroVector = true;
19920 VectorMask[i] = 0;
19921 continue;
19922 }
19923
19924 // Not an undef or zero. If the input is something other than an
19925 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19926 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19927 !isa<ConstantSDNode>(Op.getOperand(1)))
19928 return SDValue();
19929 SDValue ExtractedFromVec = Op.getOperand(0);
19930
19931 if (ExtractedFromVec.getValueType().isScalableVector())
19932 return SDValue();
19933
19934 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19935 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19936 return SDValue();
19937
19938 // All inputs must have the same element type as the output.
19939 if (VT.getVectorElementType() !=
19940 ExtractedFromVec.getValueType().getVectorElementType())
19941 return SDValue();
19942
19943 // Have we seen this input vector before?
19944 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19945 // a map back from SDValues to numbers isn't worth it.
19947 if (Idx == -1) { // A new source vector?
19948 Idx = VecIn.size();
19949 VecIn.push_back(ExtractedFromVec);
19950 }
19951
19952 VectorMask[i] = Idx;
19953 }
19954
19955 // If we didn't find at least one input vector, bail out.
19956 if (VecIn.size() < 2)
19957 return SDValue();
19958
19959 // If all the Operands of BUILD_VECTOR extract from same
19960 // vector, then split the vector efficiently based on the maximum
19961 // vector access index and adjust the VectorMask and
19962 // VecIn accordingly.
19963 bool DidSplitVec = false;
19964 if (VecIn.size() == 2) {
19965 unsigned MaxIndex = 0;
19966 unsigned NearestPow2 = 0;
19967 SDValue Vec = VecIn.back();
19968 EVT InVT = Vec.getValueType();
19970
19971 for (unsigned i = 0; i < NumElems; i++) {
19972 if (VectorMask[i] <= 0)
19973 continue;
19974 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19975 IndexVec[i] = Index;
19976 MaxIndex = std::max(MaxIndex, Index);
19977 }
19978
19979 NearestPow2 = PowerOf2Ceil(MaxIndex);
19980 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19981 NumElems * 2 < NearestPow2) {
19982 unsigned SplitSize = NearestPow2 / 2;
19984 InVT.getVectorElementType(), SplitSize);
19985 if (TLI.isTypeLegal(SplitVT) &&
19986 SplitSize + SplitVT.getVectorNumElements() <=
19987 InVT.getVectorNumElements()) {
19991 DAG.getVectorIdxConstant(0, DL));
19992 VecIn.pop_back();
19993 VecIn.push_back(VecIn1);
19994 VecIn.push_back(VecIn2);
19995 DidSplitVec = true;
19996
19997 for (unsigned i = 0; i < NumElems; i++) {
19998 if (VectorMask[i] <= 0)
19999 continue;
20000 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
20001 }
20002 }
20003 }
20004 }
20005
20006 // Sort input vectors by decreasing vector element count,
20007 // while preserving the relative order of equally-sized vectors.
20008 // Note that we keep the first "implicit zero vector as-is.
20011 [](const SDValue &a, const SDValue &b) {
20012 return a.getValueType().getVectorNumElements() >
20014 });
20015
20016 // We now also need to rebuild the VectorMask, because it referenced element
20017 // order in VecIn, and we just sorted them.
20018 for (int &SourceVectorIndex : VectorMask) {
20019 if (SourceVectorIndex <= 0)
20020 continue;
20022 assert(Idx > 0 && Idx < SortedVecIn.size() &&
20023 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
20025 }
20026
20027 VecIn = std::move(SortedVecIn);
20028
20029 // TODO: Should this fire if some of the input vectors has illegal type (like
20030 // it does now), or should we let legalization run its course first?
20031
20032 // Shuffle phase:
20033 // Take pairs of vectors, and shuffle them so that the result has elements
20034 // from these vectors in the correct places.
20035 // For example, given:
20036 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
20037 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
20038 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
20039 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
20040 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
20041 // We will generate:
20042 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
20043 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
20044 SmallVector<SDValue, 4> Shuffles;
20045 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
20046 unsigned LeftIdx = 2 * In + 1;
20049 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
20050
20053 Shuffles.push_back(Shuffle);
20054 else
20055 return SDValue();
20056 }
20057
20058 // If we need the zero vector as an "ingredient" in the blend tree, add it
20059 // to the list of shuffles.
20060 if (UsesZeroVector)
20061 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
20062 : DAG.getConstantFP(0.0, DL, VT));
20063
20064 // If we only have one shuffle, we're done.
20065 if (Shuffles.size() == 1)
20066 return Shuffles[0];
20067
20068 // Update the vector mask to point to the post-shuffle vectors.
20069 for (int &Vec : VectorMask)
20070 if (Vec == 0)
20071 Vec = Shuffles.size() - 1;
20072 else
20073 Vec = (Vec - 1) / 2;
20074
20075 // More than one shuffle. Generate a binary tree of blends, e.g. if from
20076 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
20077 // generate:
20078 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
20079 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
20080 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
20081 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
20082 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
20083 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
20084 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
20085
20086 // Make sure the initial size of the shuffle list is even.
20087 if (Shuffles.size() % 2)
20088 Shuffles.push_back(DAG.getUNDEF(VT));
20089
20090 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
20091 if (CurSize % 2) {
20092 Shuffles[CurSize] = DAG.getUNDEF(VT);
20093 CurSize++;
20094 }
20095 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
20096 int Left = 2 * In;
20097 int Right = 2 * In + 1;
20099 for (unsigned i = 0; i != NumElems; ++i) {
20100 if (VectorMask[i] == Left) {
20101 Mask[i] = i;
20102 VectorMask[i] = In;
20103 } else if (VectorMask[i] == Right) {
20104 Mask[i] = i + NumElems;
20105 VectorMask[i] = In;
20106 }
20107 }
20108
20109 Shuffles[In] =
20110 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
20111 }
20112 }
20113 return Shuffles[0];
20114}
20115
20116// Try to turn a build vector of zero extends of extract vector elts into a
20117// a vector zero extend and possibly an extract subvector.
20118// TODO: Support sign extend?
20119// TODO: Allow undef elements?
20120SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
20121 if (LegalOperations)
20122 return SDValue();
20123
20124 EVT VT = N->getValueType(0);
20125
20126 bool FoundZeroExtend = false;
20127 SDValue Op0 = N->getOperand(0);
20128 auto checkElem = [&](SDValue Op) -> int64_t {
20129 unsigned Opc = Op.getOpcode();
20131 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
20132 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20133 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
20134 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
20135 return C->getZExtValue();
20136 return -1;
20137 };
20138
20139 // Make sure the first element matches
20140 // (zext (extract_vector_elt X, C))
20141 // Offset must be a constant multiple of the
20142 // known-minimum vector length of the result type.
20143 int64_t Offset = checkElem(Op0);
20144 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20145 return SDValue();
20146
20147 unsigned NumElems = N->getNumOperands();
20148 SDValue In = Op0.getOperand(0).getOperand(0);
20149 EVT InSVT = In.getValueType().getScalarType();
20151
20152 // Don't create an illegal input type after type legalization.
20153 if (LegalTypes && !TLI.isTypeLegal(InVT))
20154 return SDValue();
20155
20156 // Ensure all the elements come from the same vector and are adjacent.
20157 for (unsigned i = 1; i != NumElems; ++i) {
20158 if ((Offset + i) != checkElem(N->getOperand(i)))
20159 return SDValue();
20160 }
20161
20162 SDLoc DL(N);
20164 Op0.getOperand(0).getOperand(1));
20166 VT, In);
20167}
20168
20169SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20170 EVT VT = N->getValueType(0);
20171
20172 // A vector built entirely of undefs is undef.
20174 return DAG.getUNDEF(VT);
20175
20176 // If this is a splat of a bitcast from another vector, change to a
20177 // concat_vector.
20178 // For example:
20179 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20180 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20181 //
20182 // If X is a build_vector itself, the concat can become a larger build_vector.
20183 // TODO: Maybe this is useful for non-splat too?
20184 if (!LegalOperations) {
20186 Splat = peekThroughBitcasts(Splat);
20187 EVT SrcVT = Splat.getValueType();
20188 if (SrcVT.isVector()) {
20189 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20191 SrcVT.getVectorElementType(), NumElts);
20192 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20193 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20195 NewVT, Ops);
20196 return DAG.getBitcast(VT, Concat);
20197 }
20198 }
20199 }
20200 }
20201
20202 // Check if we can express BUILD VECTOR via subvector extract.
20203 if (!LegalTypes && (N->getNumOperands() > 1)) {
20204 SDValue Op0 = N->getOperand(0);
20205 auto checkElem = [&](SDValue Op) -> uint64_t {
20206 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20207 (Op0.getOperand(0) == Op.getOperand(0)))
20208 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20209 return CNode->getZExtValue();
20210 return -1;
20211 };
20212
20213 int Offset = checkElem(Op0);
20214 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20215 if (Offset + i != checkElem(N->getOperand(i))) {
20216 Offset = -1;
20217 break;
20218 }
20219 }
20220
20221 if ((Offset == 0) &&
20222 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20223 return Op0.getOperand(0);
20224 if ((Offset != -1) &&
20225 ((Offset % N->getValueType(0).getVectorNumElements()) ==
20226 0)) // IDX must be multiple of output size.
20227 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20228 Op0.getOperand(0), Op0.getOperand(1));
20229 }
20230
20232 return V;
20233
20235 return V;
20236
20238 return V;
20239
20241 return V;
20242
20243 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20244 // Do this late as some of the above may replace the splat.
20247 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
20248 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20249 }
20250
20251 return SDValue();
20252}
20253
20255 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20256 EVT OpVT = N->getOperand(0).getValueType();
20257
20258 // If the operands are legal vectors, leave them alone.
20259 if (TLI.isTypeLegal(OpVT))
20260 return SDValue();
20261
20262 SDLoc DL(N);
20263 EVT VT = N->getValueType(0);
20265
20266 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20268
20269 // Keep track of what we encounter.
20270 bool AnyInteger = false;
20271 bool AnyFP = false;
20272 for (const SDValue &Op : N->ops()) {
20273 if (ISD::BITCAST == Op.getOpcode() &&
20274 !Op.getOperand(0).getValueType().isVector())
20275 Ops.push_back(Op.getOperand(0));
20276 else if (ISD::UNDEF == Op.getOpcode())
20277 Ops.push_back(ScalarUndef);
20278 else
20279 return SDValue();
20280
20281 // Note whether we encounter an integer or floating point scalar.
20282 // If it's neither, bail out, it could be something weird like x86mmx.
20283 EVT LastOpVT = Ops.back().getValueType();
20284 if (LastOpVT.isFloatingPoint())
20285 AnyFP = true;
20286 else if (LastOpVT.isInteger())
20287 AnyInteger = true;
20288 else
20289 return SDValue();
20290 }
20291
20292 // If any of the operands is a floating point scalar bitcast to a vector,
20293 // use floating point types throughout, and bitcast everything.
20294 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20295 if (AnyFP) {
20296 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20298 if (AnyInteger) {
20299 for (SDValue &Op : Ops) {
20300 if (Op.getValueType() == SVT)
20301 continue;
20302 if (Op.isUndef())
20303 Op = ScalarUndef;
20304 else
20305 Op = DAG.getBitcast(SVT, Op);
20306 }
20307 }
20308 }
20309
20310 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20311 VT.getSizeInBits() / SVT.getSizeInBits());
20312 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20313}
20314
20315// Attempt to merge nested concat_vectors/undefs.
20316// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20317// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20319 SelectionDAG &DAG) {
20320 EVT VT = N->getValueType(0);
20321
20322 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20323 EVT SubVT;
20325 for (const SDValue &Op : N->ops()) {
20326 if (Op.isUndef())
20327 continue;
20328 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20329 return SDValue();
20330 if (!FirstConcat) {
20331 SubVT = Op.getOperand(0).getValueType();
20333 return SDValue();
20334 FirstConcat = Op;
20335 continue;
20336 }
20337 if (SubVT != Op.getOperand(0).getValueType())
20338 return SDValue();
20339 }
20340 assert(FirstConcat && "Concat of all-undefs found");
20341
20343 for (const SDValue &Op : N->ops()) {
20344 if (Op.isUndef()) {
20345 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20346 continue;
20347 }
20348 ConcatOps.append(Op->op_begin(), Op->op_end());
20349 }
20350 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20351}
20352
20353// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20354// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20355// most two distinct vectors the same size as the result, attempt to turn this
20356// into a legal shuffle.
20358 EVT VT = N->getValueType(0);
20359 EVT OpVT = N->getOperand(0).getValueType();
20360
20361 // We currently can't generate an appropriate shuffle for a scalable vector.
20362 if (VT.isScalableVector())
20363 return SDValue();
20364
20365 int NumElts = VT.getVectorNumElements();
20366 int NumOpElts = OpVT.getVectorNumElements();
20367
20368 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20370
20371 for (SDValue Op : N->ops()) {
20372 Op = peekThroughBitcasts(Op);
20373
20374 // UNDEF nodes convert to UNDEF shuffle mask values.
20375 if (Op.isUndef()) {
20376 Mask.append((unsigned)NumOpElts, -1);
20377 continue;
20378 }
20379
20380 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20381 return SDValue();
20382
20383 // What vector are we extracting the subvector from and at what index?
20384 SDValue ExtVec = Op.getOperand(0);
20385 int ExtIdx = Op.getConstantOperandVal(1);
20386
20387 // We want the EVT of the original extraction to correctly scale the
20388 // extraction index.
20389 EVT ExtVT = ExtVec.getValueType();
20391
20392 // UNDEF nodes convert to UNDEF shuffle mask values.
20393 if (ExtVec.isUndef()) {
20394 Mask.append((unsigned)NumOpElts, -1);
20395 continue;
20396 }
20397
20398 // Ensure that we are extracting a subvector from a vector the same
20399 // size as the result.
20400 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20401 return SDValue();
20402
20403 // Scale the subvector index to account for any bitcast.
20404 int NumExtElts = ExtVT.getVectorNumElements();
20405 if (0 == (NumExtElts % NumElts))
20406 ExtIdx /= (NumExtElts / NumElts);
20407 else if (0 == (NumElts % NumExtElts))
20408 ExtIdx *= (NumElts / NumExtElts);
20409 else
20410 return SDValue();
20411
20412 // At most we can reference 2 inputs in the final shuffle.
20413 if (SV0.isUndef() || SV0 == ExtVec) {
20414 SV0 = ExtVec;
20415 for (int i = 0; i != NumOpElts; ++i)
20416 Mask.push_back(i + ExtIdx);
20417 } else if (SV1.isUndef() || SV1 == ExtVec) {
20418 SV1 = ExtVec;
20419 for (int i = 0; i != NumOpElts; ++i)
20420 Mask.push_back(i + ExtIdx + NumElts);
20421 } else {
20422 return SDValue();
20423 }
20424 }
20425
20426 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20427 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20428 DAG.getBitcast(VT, SV1), Mask, DAG);
20429}
20430
20432 unsigned CastOpcode = N->getOperand(0).getOpcode();
20433 switch (CastOpcode) {
20434 case ISD::SINT_TO_FP:
20435 case ISD::UINT_TO_FP:
20436 case ISD::FP_TO_SINT:
20437 case ISD::FP_TO_UINT:
20438 // TODO: Allow more opcodes?
20439 // case ISD::BITCAST:
20440 // case ISD::TRUNCATE:
20441 // case ISD::ZERO_EXTEND:
20442 // case ISD::SIGN_EXTEND:
20443 // case ISD::FP_EXTEND:
20444 break;
20445 default:
20446 return SDValue();
20447 }
20448
20449 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20450 if (!SrcVT.isVector())
20451 return SDValue();
20452
20453 // All operands of the concat must be the same kind of cast from the same
20454 // source type.
20456 for (SDValue Op : N->ops()) {
20457 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20458 Op.getOperand(0).getValueType() != SrcVT)
20459 return SDValue();
20460 SrcOps.push_back(Op.getOperand(0));
20461 }
20462
20463 // The wider cast must be supported by the target. This is unusual because
20464 // the operation support type parameter depends on the opcode. In addition,
20465 // check the other type in the cast to make sure this is really legal.
20466 EVT VT = N->getValueType(0);
20467 EVT SrcEltVT = SrcVT.getVectorElementType();
20468 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20470 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20471 switch (CastOpcode) {
20472 case ISD::SINT_TO_FP:
20473 case ISD::UINT_TO_FP:
20475 !TLI.isTypeLegal(VT))
20476 return SDValue();
20477 break;
20478 case ISD::FP_TO_SINT:
20479 case ISD::FP_TO_UINT:
20480 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20482 return SDValue();
20483 break;
20484 default:
20485 llvm_unreachable("Unexpected cast opcode");
20486 }
20487
20488 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20489 SDLoc DL(N);
20491 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20492}
20493
20494SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20495 // If we only have one input vector, we don't need to do any concatenation.
20496 if (N->getNumOperands() == 1)
20497 return N->getOperand(0);
20498
20499 // Check if all of the operands are undefs.
20500 EVT VT = N->getValueType(0);
20502 return DAG.getUNDEF(VT);
20503
20504 // Optimize concat_vectors where all but the first of the vectors are undef.
20505 if (all_of(drop_begin(N->ops()),
20506 [](const SDValue &Op) { return Op.isUndef(); })) {
20507 SDValue In = N->getOperand(0);
20508 assert(In.getValueType().isVector() && "Must concat vectors");
20509
20510 // If the input is a concat_vectors, just make a larger concat by padding
20511 // with smaller undefs.
20512 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20513 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20514 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20515 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20516 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20517 }
20518
20520
20521 // concat_vectors(scalar_to_vector(scalar), undef) ->
20522 // scalar_to_vector(scalar)
20523 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20524 Scalar.hasOneUse()) {
20525 EVT SVT = Scalar.getValueType().getVectorElementType();
20526 if (SVT == Scalar.getOperand(0).getValueType())
20527 Scalar = Scalar.getOperand(0);
20528 }
20529
20530 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20531 if (!Scalar.getValueType().isVector()) {
20532 // If the bitcast type isn't legal, it might be a trunc of a legal type;
20533 // look through the trunc so we can still do the transform:
20534 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20535 if (Scalar->getOpcode() == ISD::TRUNCATE &&
20536 !TLI.isTypeLegal(Scalar.getValueType()) &&
20537 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20538 Scalar = Scalar->getOperand(0);
20539
20540 EVT SclTy = Scalar.getValueType();
20541
20542 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20543 return SDValue();
20544
20545 // Bail out if the vector size is not a multiple of the scalar size.
20546 if (VT.getSizeInBits() % SclTy.getSizeInBits())
20547 return SDValue();
20548
20549 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20550 if (VNTNumElms < 2)
20551 return SDValue();
20552
20554 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20555 return SDValue();
20556
20557 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20558 return DAG.getBitcast(VT, Res);
20559 }
20560 }
20561
20562 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20563 // We have already tested above for an UNDEF only concatenation.
20564 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20565 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20566 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20567 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20568 };
20569 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20571 EVT SVT = VT.getScalarType();
20572
20573 EVT MinVT = SVT;
20574 if (!SVT.isFloatingPoint()) {
20575 // If BUILD_VECTOR are from built from integer, they may have different
20576 // operand types. Get the smallest type and truncate all operands to it.
20577 bool FoundMinVT = false;
20578 for (const SDValue &Op : N->ops())
20579 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20580 EVT OpSVT = Op.getOperand(0).getValueType();
20581 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20582 FoundMinVT = true;
20583 }
20584 assert(FoundMinVT && "Concat vector type mismatch");
20585 }
20586
20587 for (const SDValue &Op : N->ops()) {
20588 EVT OpVT = Op.getValueType();
20589 unsigned NumElts = OpVT.getVectorNumElements();
20590
20591 if (ISD::UNDEF == Op.getOpcode())
20592 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20593
20594 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20595 if (SVT.isFloatingPoint()) {
20596 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20597 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20598 } else {
20599 for (unsigned i = 0; i != NumElts; ++i)
20600 Opnds.push_back(
20601 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20602 }
20603 }
20604 }
20605
20606 assert(VT.getVectorNumElements() == Opnds.size() &&
20607 "Concat vector type mismatch");
20608 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20609 }
20610
20611 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20612 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20614 return V;
20615
20616 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20617 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20619 return V;
20620
20621 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20623 return V;
20624 }
20625
20626 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20627 return V;
20628
20629 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20630 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20631 // operands and look for a CONCAT operations that place the incoming vectors
20632 // at the exact same location.
20633 //
20634 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20636 unsigned PartNumElem =
20637 N->getOperand(0).getValueType().getVectorMinNumElements();
20638
20639 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20640 SDValue Op = N->getOperand(i);
20641
20642 if (Op.isUndef())
20643 continue;
20644
20645 // Check if this is the identity extract:
20646 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20647 return SDValue();
20648
20649 // Find the single incoming vector for the extract_subvector.
20650 if (SingleSource.getNode()) {
20651 if (Op.getOperand(0) != SingleSource)
20652 return SDValue();
20653 } else {
20654 SingleSource = Op.getOperand(0);
20655
20656 // Check the source type is the same as the type of the result.
20657 // If not, this concat may extend the vector, so we can not
20658 // optimize it away.
20659 if (SingleSource.getValueType() != N->getValueType(0))
20660 return SDValue();
20661 }
20662
20663 // Check that we are reading from the identity index.
20664 unsigned IdentityIndex = i * PartNumElem;
20665 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20666 return SDValue();
20667 }
20668
20669 if (SingleSource.getNode())
20670 return SingleSource;
20671
20672 return SDValue();
20673}
20674
20675// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20676// if the subvector can be sourced for free.
20678 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20679 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20680 return V.getOperand(1);
20681 }
20682 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20683 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20684 V.getOperand(0).getValueType() == SubVT &&
20685 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20686 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20687 return V.getOperand(SubIdx);
20688 }
20689 return SDValue();
20690}
20691
20693 SelectionDAG &DAG,
20694 bool LegalOperations) {
20695 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20696 SDValue BinOp = Extract->getOperand(0);
20697 unsigned BinOpcode = BinOp.getOpcode();
20698 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20699 return SDValue();
20700
20701 EVT VecVT = BinOp.getValueType();
20702 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20703 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20704 return SDValue();
20705
20706 SDValue Index = Extract->getOperand(1);
20707 EVT SubVT = Extract->getValueType(0);
20708 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20709 return SDValue();
20710
20713
20714 // TODO: We could handle the case where only 1 operand is being inserted by
20715 // creating an extract of the other operand, but that requires checking
20716 // number of uses and/or costs.
20717 if (!Sub0 || !Sub1)
20718 return SDValue();
20719
20720 // We are inserting both operands of the wide binop only to extract back
20721 // to the narrow vector size. Eliminate all of the insert/extract:
20722 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20723 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20724 BinOp->getFlags());
20725}
20726
20727/// If we are extracting a subvector produced by a wide binary operator try
20728/// to use a narrow binary operator and/or avoid concatenation and extraction.
20730 bool LegalOperations) {
20731 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20732 // some of these bailouts with other transforms.
20733
20734 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20735 return V;
20736
20737 // The extract index must be a constant, so we can map it to a concat operand.
20739 if (!ExtractIndexC)
20740 return SDValue();
20741
20742 // We are looking for an optionally bitcasted wide vector binary operator
20743 // feeding an extract subvector.
20744 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20745 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20746 unsigned BOpcode = BinOp.getOpcode();
20747 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20748 return SDValue();
20749
20750 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20751 // reduced to the unary fneg when it is visited, and we probably want to deal
20752 // with fneg in a target-specific way.
20753 if (BOpcode == ISD::FSUB) {
20754 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20755 if (C && C->getValueAPF().isNegZero())
20756 return SDValue();
20757 }
20758
20759 // The binop must be a vector type, so we can extract some fraction of it.
20760 EVT WideBVT = BinOp.getValueType();
20761 // The optimisations below currently assume we are dealing with fixed length
20762 // vectors. It is possible to add support for scalable vectors, but at the
20763 // moment we've done no analysis to prove whether they are profitable or not.
20764 if (!WideBVT.isFixedLengthVector())
20765 return SDValue();
20766
20767 EVT VT = Extract->getValueType(0);
20768 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20770 "Extract index is not a multiple of the vector length.");
20771
20772 // Bail out if this is not a proper multiple width extraction.
20773 unsigned WideWidth = WideBVT.getSizeInBits();
20774 unsigned NarrowWidth = VT.getSizeInBits();
20775 if (WideWidth % NarrowWidth != 0)
20776 return SDValue();
20777
20778 // Bail out if we are extracting a fraction of a single operation. This can
20779 // occur because we potentially looked through a bitcast of the binop.
20781 unsigned WideNumElts = WideBVT.getVectorNumElements();
20782 if (WideNumElts % NarrowingRatio != 0)
20783 return SDValue();
20784
20785 // Bail out if the target does not support a narrower version of the binop.
20786 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20789 return SDValue();
20790
20791 // If extraction is cheap, we don't need to look at the binop operands
20792 // for concat ops. The narrow binop alone makes this transform profitable.
20793 // We can't just reuse the original extract index operand because we may have
20794 // bitcasted.
20796 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20798 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20799 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20800 SDLoc DL(Extract);
20803 BinOp.getOperand(0), NewExtIndex);
20805 BinOp.getOperand(1), NewExtIndex);
20807 BinOp.getNode()->getFlags());
20808 return DAG.getBitcast(VT, NarrowBinOp);
20809 }
20810
20811 // Only handle the case where we are doubling and then halving. A larger ratio
20812 // may require more than two narrow binops to replace the wide binop.
20813 if (NarrowingRatio != 2)
20814 return SDValue();
20815
20816 // TODO: The motivating case for this transform is an x86 AVX1 target. That
20817 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20818 // flavors, but no other 256-bit integer support. This could be extended to
20819 // handle any binop, but that may require fixing/adding other folds to avoid
20820 // codegen regressions.
20821 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20822 return SDValue();
20823
20824 // We need at least one concatenation operation of a binop operand to make
20825 // this transform worthwhile. The concat must double the input vector sizes.
20826 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20827 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20828 return V.getOperand(ConcatOpNum);
20829 return SDValue();
20830 };
20833
20834 if (SubVecL || SubVecR) {
20835 // If a binop operand was not the result of a concat, we must extract a
20836 // half-sized operand for our new narrow binop:
20837 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20838 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20839 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20840 SDLoc DL(Extract);
20844 BinOp.getOperand(0), IndexC);
20845
20848 BinOp.getOperand(1), IndexC);
20849
20851 return DAG.getBitcast(VT, NarrowBinOp);
20852 }
20853
20854 return SDValue();
20855}
20856
20857/// If we are extracting a subvector from a wide vector load, convert to a
20858/// narrow load to eliminate the extraction:
20859/// (extract_subvector (load wide vector)) --> (load narrow vector)
20861 // TODO: Add support for big-endian. The offset calculation must be adjusted.
20862 if (DAG.getDataLayout().isBigEndian())
20863 return SDValue();
20864
20865 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20866 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
20867 return SDValue();
20868
20869 // Allow targets to opt-out.
20870 EVT VT = Extract->getValueType(0);
20871
20872 // We can only create byte sized loads.
20873 if (!VT.isByteSized())
20874 return SDValue();
20875
20876 unsigned Index = Extract->getConstantOperandVal(1);
20877 unsigned NumElts = VT.getVectorMinNumElements();
20878
20879 // The definition of EXTRACT_SUBVECTOR states that the index must be a
20880 // multiple of the minimum number of elements in the result type.
20881 assert(Index % NumElts == 0 && "The extract subvector index is not a "
20882 "multiple of the result's element count");
20883
20884 // It's fine to use TypeSize here as we know the offset will not be negative.
20885 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20886
20887 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20888 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20889 return SDValue();
20890
20891 // The narrow load will be offset from the base address of the old load if
20892 // we are extracting from something besides index 0 (little-endian).
20893 SDLoc DL(Extract);
20894
20895 // TODO: Use "BaseIndexOffset" to make this more effective.
20896 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20897
20900 MachineMemOperand *MMO;
20901 if (Offset.isScalable()) {
20902 MachinePointerInfo MPI =
20903 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20904 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20905 } else
20906 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20907 StoreSize);
20908
20909 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20910 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20911 return NewLd;
20912}
20913
20914/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
20915/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
20916/// EXTRACT_SUBVECTOR(Op?, ?),
20917/// Mask'))
20918/// iff it is legal and profitable to do so. Notably, the trimmed mask
20919/// (containing only the elements that are extracted)
20920/// must reference at most two subvectors.
20922 SelectionDAG &DAG,
20923 const TargetLowering &TLI,
20924 bool LegalOperations) {
20925 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20926 "Must only be called on EXTRACT_SUBVECTOR's");
20927
20928 SDValue N0 = N->getOperand(0);
20929
20930 // Only deal with non-scalable vectors.
20931 EVT NarrowVT = N->getValueType(0);
20932 EVT WideVT = N0.getValueType();
20933 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
20934 return SDValue();
20935
20936 // The operand must be a shufflevector.
20938 if (!WideShuffleVector)
20939 return SDValue();
20940
20941 // The old shuffleneeds to go away.
20942 if (!WideShuffleVector->hasOneUse())
20943 return SDValue();
20944
20945 // And the narrow shufflevector that we'll form must be legal.
20946 if (LegalOperations &&
20948 return SDValue();
20949
20950 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
20951 int NumEltsExtracted = NarrowVT.getVectorNumElements();
20953 "Extract index is not a multiple of the output vector length.");
20954
20955 int WideNumElts = WideVT.getVectorNumElements();
20956
20958 NewMask.reserve(NumEltsExtracted);
20959 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
20961
20962 // Try to decode the wide mask into narrow mask from at most two subvectors.
20963 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
20965 assert((M >= -1) && (M < (2 * WideNumElts)) &&
20966 "Out-of-bounds shuffle mask?");
20967
20968 if (M < 0) {
20969 // Does not depend on operands, does not require adjustment.
20970 NewMask.emplace_back(M);
20971 continue;
20972 }
20973
20974 // From which operand of the shuffle does this shuffle mask element pick?
20975 int WideShufOpIdx = M / WideNumElts;
20976 // Which element of that operand is picked?
20977 int OpEltIdx = M % WideNumElts;
20978
20980 "Shuffle mask vector decomposition failure.");
20981
20982 // And which NumEltsExtracted-sized subvector of that operand is that?
20984 // And which element within that subvector of that operand is that?
20986
20988 "Shuffle mask subvector decomposition failure.");
20989
20991 WideShufOpIdx * WideNumElts) == M &&
20992 "Shuffle mask full decomposition failure.");
20993
20994 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
20995
20996 if (Op.isUndef()) {
20997 // Picking from an undef operand. Let's adjust mask instead.
20998 NewMask.emplace_back(-1);
20999 continue;
21000 }
21001
21002 // Profitability check: only deal with extractions from the first subvector.
21003 if (OpSubvecIdx != 0)
21004 return SDValue();
21005
21006 const std::pair<SDValue, int> DemandedSubvector =
21007 std::make_pair(Op, OpSubvecIdx);
21008
21010 if (DemandedSubvectors.size() > 2)
21011 return SDValue(); // We can't handle more than two subvectors.
21012 // How many elements into the WideVT does this subvector start?
21013 int Index = NumEltsExtracted * OpSubvecIdx;
21014 // Bail out if the extraction isn't going to be cheap.
21015 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
21016 return SDValue();
21017 }
21018
21019 // Ok, but from which operand of the new shuffle will this element pick?
21020 int NewOpIdx =
21022 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
21023
21025 NewMask.emplace_back(AdjM);
21026 }
21027 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
21028 assert(DemandedSubvectors.size() <= 2 &&
21029 "Should have ended up demanding at most two subvectors.");
21030
21031 // Did we discover that the shuffle does not actually depend on operands?
21032 if (DemandedSubvectors.empty())
21033 return DAG.getUNDEF(NarrowVT);
21034
21035 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
21036 // operand[s]/index[es], so there is no point in checking for it's legality.
21037
21038 // Do not turn a legal shuffle into an illegal one.
21039 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
21041 return SDValue();
21042
21043 SDLoc DL(N);
21044
21046 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
21048 // How many elements into the WideVT does this subvector start?
21049 int Index = NumEltsExtracted * DemandedSubvector.second;
21050 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
21051 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
21052 DemandedSubvector.first, IndexC));
21053 }
21054 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
21055 "Should end up with either one or two ops");
21056
21057 // If we ended up with only one operand, pad with an undef.
21058 if (NewOps.size() == 1)
21059 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
21060
21061 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
21062}
21063
21064SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
21065 EVT NVT = N->getValueType(0);
21066 SDValue V = N->getOperand(0);
21067 uint64_t ExtIdx = N->getConstantOperandVal(1);
21068
21069 // Extract from UNDEF is UNDEF.
21070 if (V.isUndef())
21071 return DAG.getUNDEF(NVT);
21072
21075 return NarrowLoad;
21076
21077 // Combine an extract of an extract into a single extract_subvector.
21078 // ext (ext X, C), 0 --> ext X, C
21079 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
21080 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
21081 V.getConstantOperandVal(1)) &&
21083 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
21084 V.getOperand(1));
21085 }
21086 }
21087
21088 // Try to move vector bitcast after extract_subv by scaling extraction index:
21089 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
21090 if (V.getOpcode() == ISD::BITCAST &&
21091 V.getOperand(0).getValueType().isVector() &&
21092 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
21093 SDValue SrcOp = V.getOperand(0);
21094 EVT SrcVT = SrcOp.getValueType();
21095 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
21096 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
21097 if ((SrcNumElts % DestNumElts) == 0) {
21098 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
21099 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
21100 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
21101 NewExtEC);
21103 SDLoc DL(N);
21106 V.getOperand(0), NewIndex);
21107 return DAG.getBitcast(NVT, NewExtract);
21108 }
21109 }
21110 if ((DestNumElts % SrcNumElts) == 0) {
21111 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
21112 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
21114 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
21115 EVT ScalarVT = SrcVT.getScalarType();
21116 if ((ExtIdx % DestSrcRatio) == 0) {
21117 SDLoc DL(N);
21118 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
21119 EVT NewExtVT =
21125 V.getOperand(0), NewIndex);
21126 return DAG.getBitcast(NVT, NewExtract);
21127 }
21128 if (NewExtEC.isScalar() &&
21133 V.getOperand(0), NewIndex);
21134 return DAG.getBitcast(NVT, NewExtract);
21135 }
21136 }
21137 }
21138 }
21139 }
21140
21141 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21142 unsigned ExtNumElts = NVT.getVectorMinNumElements();
21143 EVT ConcatSrcVT = V.getOperand(0).getValueType();
21144 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
21145 "Concat and extract subvector do not change element type");
21146 assert((ExtIdx % ExtNumElts) == 0 &&
21147 "Extract index is not a multiple of the input vector length.");
21148
21149 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21150 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21151
21152 // If the concatenated source types match this extract, it's a direct
21153 // simplification:
21154 // extract_subvec (concat V1, V2, ...), i --> Vi
21155 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21156 return V.getOperand(ConcatOpIdx);
21157
21158 // If the concatenated source vectors are a multiple length of this extract,
21159 // then extract a fraction of one of those source vectors directly from a
21160 // concat operand. Example:
21161 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21162 // v2i8 extract_subvec v8i8 Y, 6
21163 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21165 SDLoc DL(N);
21168 "Trying to extract from >1 concat operand?");
21169 assert(NewExtIdx % ExtNumElts == 0 &&
21170 "Extract index is not a multiple of the input vector length.");
21172 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21173 V.getOperand(ConcatOpIdx), NewIndexC);
21174 }
21175 }
21176
21177 if (SDValue V =
21178 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21179 return V;
21180
21181 V = peekThroughBitcasts(V);
21182
21183 // If the input is a build vector. Try to make a smaller build vector.
21184 if (V.getOpcode() == ISD::BUILD_VECTOR) {
21185 EVT InVT = V.getValueType();
21186 unsigned ExtractSize = NVT.getSizeInBits();
21187 unsigned EltSize = InVT.getScalarSizeInBits();
21188 // Only do this if we won't split any elements.
21189 if (ExtractSize % EltSize == 0) {
21190 unsigned NumElems = ExtractSize / EltSize;
21191 EVT EltVT = InVT.getVectorElementType();
21192 EVT ExtractVT =
21193 NumElems == 1 ? EltVT
21195 if ((Level < AfterLegalizeDAG ||
21196 (NumElems == 1 ||
21198 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21199 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21200
21201 if (NumElems == 1) {
21202 SDValue Src = V->getOperand(IdxVal);
21203 if (EltVT != Src.getValueType())
21204 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21205 return DAG.getBitcast(NVT, Src);
21206 }
21207
21208 // Extract the pieces from the original build_vector.
21210 V->ops().slice(IdxVal, NumElems));
21211 return DAG.getBitcast(NVT, BuildVec);
21212 }
21213 }
21214 }
21215
21216 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21217 // Handle only simple case where vector being inserted and vector
21218 // being extracted are of same size.
21219 EVT SmallVT = V.getOperand(1).getValueType();
21220 if (!NVT.bitsEq(SmallVT))
21221 return SDValue();
21222
21223 // Combine:
21224 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21225 // Into:
21226 // indices are equal or bit offsets are equal => V1
21227 // otherwise => (extract_subvec V1, ExtIdx)
21228 uint64_t InsIdx = V.getConstantOperandVal(2);
21229 if (InsIdx * SmallVT.getScalarSizeInBits() ==
21230 ExtIdx * NVT.getScalarSizeInBits()) {
21231 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21232 return SDValue();
21233
21234 return DAG.getBitcast(NVT, V.getOperand(1));
21235 }
21236 return DAG.getNode(
21238 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21239 N->getOperand(1));
21240 }
21241
21242 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21243 return NarrowBOp;
21244
21246 return SDValue(N, 0);
21247
21248 return SDValue();
21249}
21250
21251/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21252/// followed by concatenation. Narrow vector ops may have better performance
21253/// than wide ops, and this can unlock further narrowing of other vector ops.
21254/// Targets can invert this transform later if it is not profitable.
21256 SelectionDAG &DAG) {
21257 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21258 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21259 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21260 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21261 return SDValue();
21262
21263 // Split the wide shuffle mask into halves. Any mask element that is accessing
21264 // operand 1 is offset down to account for narrowing of the vectors.
21265 ArrayRef<int> Mask = Shuf->getMask();
21266 EVT VT = Shuf->getValueType(0);
21267 unsigned NumElts = VT.getVectorNumElements();
21268 unsigned HalfNumElts = NumElts / 2;
21271 for (unsigned i = 0; i != NumElts; ++i) {
21272 if (Mask[i] == -1)
21273 continue;
21274 // If we reference the upper (undef) subvector then the element is undef.
21275 if ((Mask[i] % NumElts) >= HalfNumElts)
21276 continue;
21277 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21278 if (i < HalfNumElts)
21279 Mask0[i] = M;
21280 else
21281 Mask1[i - HalfNumElts] = M;
21282 }
21283
21284 // Ask the target if this is a valid transform.
21285 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21287 HalfNumElts);
21288 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21290 return SDValue();
21291
21292 // shuffle (concat X, undef), (concat Y, undef), Mask -->
21293 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21294 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21295 SDLoc DL(Shuf);
21298 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21299}
21300
21301// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21302// or turn a shuffle of a single concat into simpler shuffle then concat.
21304 EVT VT = N->getValueType(0);
21305 unsigned NumElts = VT.getVectorNumElements();
21306
21307 SDValue N0 = N->getOperand(0);
21308 SDValue N1 = N->getOperand(1);
21310 ArrayRef<int> Mask = SVN->getMask();
21311
21314 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21316
21317 auto IsUndefMaskElt = [](int i) { return i == -1; };
21318
21319 // Special case: shuffle(concat(A,B)) can be more efficiently represented
21320 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21321 // half vector elements.
21322 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21324 IsUndefMaskElt)) {
21325 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21326 N0.getOperand(1),
21327 Mask.slice(0, NumElemsPerConcat));
21328 N1 = DAG.getUNDEF(ConcatVT);
21329 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21330 }
21331
21332 // Look at every vector that's inserted. We're looking for exact
21333 // subvector-sized copies from a concatenated vector
21334 for (unsigned I = 0; I != NumConcats; ++I) {
21335 unsigned Begin = I * NumElemsPerConcat;
21336 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21337
21338 // Make sure we're dealing with a copy.
21340 Ops.push_back(DAG.getUNDEF(ConcatVT));
21341 continue;
21342 }
21343
21344 int OpIdx = -1;
21345 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21346 if (IsUndefMaskElt(SubMask[i]))
21347 continue;
21348 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21349 return SDValue();
21351 if (0 <= OpIdx && EltOpIdx != OpIdx)
21352 return SDValue();
21353 OpIdx = EltOpIdx;
21354 }
21355 assert(0 <= OpIdx && "Unknown concat_vectors op");
21356
21357 if (OpIdx < (int)N0.getNumOperands())
21358 Ops.push_back(N0.getOperand(OpIdx));
21359 else
21360 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21361 }
21362
21363 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21364}
21365
21366// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21367// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21368//
21369// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
21370// a simplification in some sense, but it isn't appropriate in general: some
21371// BUILD_VECTORs are substantially cheaper than others. The general case
21372// of a BUILD_VECTOR requires inserting each element individually (or
21373// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
21374// all constants is a single constant pool load. A BUILD_VECTOR where each
21375// element is identical is a splat. A BUILD_VECTOR where most of the operands
21376// are undef lowers to a small number of element insertions.
21377//
21378// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
21379// We don't fold shuffles where one side is a non-zero constant, and we don't
21380// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
21381// non-constant operands. This seems to work out reasonably well in practice.
21383 SelectionDAG &DAG,
21384 const TargetLowering &TLI) {
21385 EVT VT = SVN->getValueType(0);
21386 unsigned NumElts = VT.getVectorNumElements();
21387 SDValue N0 = SVN->getOperand(0);
21388 SDValue N1 = SVN->getOperand(1);
21389
21390 if (!N0->hasOneUse())
21391 return SDValue();
21392
21393 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
21394 // discussed above.
21395 if (!N1.isUndef()) {
21396 if (!N1->hasOneUse())
21397 return SDValue();
21398
21402 return SDValue();
21403 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
21404 return SDValue();
21405 }
21406
21407 // If both inputs are splats of the same value then we can safely merge this
21408 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
21409 bool IsSplat = false;
21410 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
21412 if (BV0 && BV1)
21413 if (SDValue Splat0 = BV0->getSplatValue())
21414 IsSplat = (Splat0 == BV1->getSplatValue());
21415
21418 for (int M : SVN->getMask()) {
21419 SDValue Op = DAG.getUNDEF(VT.getScalarType());
21420 if (M >= 0) {
21421 int Idx = M < (int)NumElts ? M : M - NumElts;
21422 SDValue &S = (M < (int)NumElts ? N0 : N1);
21423 if (S.getOpcode() == ISD::BUILD_VECTOR) {
21424 Op = S.getOperand(Idx);
21425 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
21426 SDValue Op0 = S.getOperand(0);
21427 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
21428 } else {
21429 // Operand can't be combined - bail out.
21430 return SDValue();
21431 }
21432 }
21433
21434 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
21435 // generating a splat; semantically, this is fine, but it's likely to
21436 // generate low-quality code if the target can't reconstruct an appropriate
21437 // shuffle.
21438 if (!Op.isUndef() && !isIntOrFPConstant(Op))
21439 if (!IsSplat && !DuplicateOps.insert(Op).second)
21440 return SDValue();
21441
21442 Ops.push_back(Op);
21443 }
21444
21445 // BUILD_VECTOR requires all inputs to be of the same type, find the
21446 // maximum type and extend them all.
21447 EVT SVT = VT.getScalarType();
21448 if (SVT.isInteger())
21449 for (SDValue &Op : Ops)
21450 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
21451 if (SVT != VT.getScalarType())
21452 for (SDValue &Op : Ops)
21453 Op = TLI.isZExtFree(Op.getValueType(), SVT)
21454 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
21455 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
21456 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
21457}
21458
21459// Match shuffles that can be converted to any_vector_extend_in_reg.
21460// This is often generated during legalization.
21461// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
21462// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
21464 SelectionDAG &DAG,
21465 const TargetLowering &TLI,
21466 bool LegalOperations) {
21467 EVT VT = SVN->getValueType(0);
21468 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21469
21470 // TODO Add support for big-endian when we have a test case.
21471 if (!VT.isInteger() || IsBigEndian)
21472 return SDValue();
21473
21474 unsigned NumElts = VT.getVectorNumElements();
21475 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21476 ArrayRef<int> Mask = SVN->getMask();
21477 SDValue N0 = SVN->getOperand(0);
21478
21479 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
21480 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
21481 for (unsigned i = 0; i != NumElts; ++i) {
21482 if (Mask[i] < 0)
21483 continue;
21484 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
21485 continue;
21486 return false;
21487 }
21488 return true;
21489 };
21490
21491 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
21492 // power-of-2 extensions as they are the most likely.
21493 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
21494 // Check for non power of 2 vector sizes
21495 if (NumElts % Scale != 0)
21496 continue;
21497 if (!isAnyExtend(Scale))
21498 continue;
21499
21501 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
21502 // Never create an illegal type. Only create unsupported operations if we
21503 // are pre-legalization.
21504 if (TLI.isTypeLegal(OutVT))
21505 if (!LegalOperations ||
21507 return DAG.getBitcast(VT,
21509 SDLoc(SVN), OutVT, N0));
21510 }
21511
21512 return SDValue();
21513}
21514
21515// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
21516// each source element of a large type into the lowest elements of a smaller
21517// destination type. This is often generated during legalization.
21518// If the source node itself was a '*_extend_vector_inreg' node then we should
21519// then be able to remove it.
21521 SelectionDAG &DAG) {
21522 EVT VT = SVN->getValueType(0);
21523 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21524
21525 // TODO Add support for big-endian when we have a test case.
21526 if (!VT.isInteger() || IsBigEndian)
21527 return SDValue();
21528
21529 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
21530
21531 unsigned Opcode = N0.getOpcode();
21532 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
21535 return SDValue();
21536
21537 SDValue N00 = N0.getOperand(0);
21538 ArrayRef<int> Mask = SVN->getMask();
21539 unsigned NumElts = VT.getVectorNumElements();
21540 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21541 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
21543
21545 return SDValue();
21547
21548 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
21549 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
21550 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
21551 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
21552 for (unsigned i = 0; i != NumElts; ++i) {
21553 if (Mask[i] < 0)
21554 continue;
21555 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
21556 continue;
21557 return false;
21558 }
21559 return true;
21560 };
21561
21562 // At the moment we just handle the case where we've truncated back to the
21563 // same size as before the extension.
21564 // TODO: handle more extension/truncation cases as cases arise.
21566 return SDValue();
21567
21568 // We can remove *extend_vector_inreg only if the truncation happens at
21569 // the same scale as the extension.
21570 if (isTruncate(ExtScale))
21571 return DAG.getBitcast(VT, N00);
21572
21573 return SDValue();
21574}
21575
21576// Combine shuffles of splat-shuffles of the form:
21577// shuffle (shuffle V, undef, splat-mask), undef, M
21578// If splat-mask contains undef elements, we need to be careful about
21579// introducing undef's in the folded mask which are not the result of composing
21580// the masks of the shuffles.
21582 SelectionDAG &DAG) {
21583 if (!Shuf->getOperand(1).isUndef())
21584 return SDValue();
21585 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21586 if (!Splat || !Splat->isSplat())
21587 return SDValue();
21588
21589 ArrayRef<int> ShufMask = Shuf->getMask();
21590 ArrayRef<int> SplatMask = Splat->getMask();
21591 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
21592
21593 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21594 // every undef mask element in the splat-shuffle has a corresponding undef
21595 // element in the user-shuffle's mask or if the composition of mask elements
21596 // would result in undef.
21597 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21598 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21599 // In this case it is not legal to simplify to the splat-shuffle because we
21600 // may be exposing the users of the shuffle an undef element at index 1
21601 // which was not there before the combine.
21602 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21603 // In this case the composition of masks yields SplatMask, so it's ok to
21604 // simplify to the splat-shuffle.
21605 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21606 // In this case the composed mask includes all undef elements of SplatMask
21607 // and in addition sets element zero to undef. It is safe to simplify to
21608 // the splat-shuffle.
21611 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21612 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21613 SplatMask[UserMask[i]] != -1)
21614 return false;
21615 return true;
21616 };
21618 return Shuf->getOperand(0);
21619
21620 // Create a new shuffle with a mask that is composed of the two shuffles'
21621 // masks.
21623 for (int Idx : ShufMask)
21624 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21625
21626 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21627 Splat->getOperand(0), Splat->getOperand(1),
21628 NewMask);
21629}
21630
21631/// Combine shuffle of shuffle of the form:
21632/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21634 SelectionDAG &DAG) {
21635 if (!OuterShuf->getOperand(1).isUndef())
21636 return SDValue();
21637 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21638 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21639 return SDValue();
21640
21641 ArrayRef<int> OuterMask = OuterShuf->getMask();
21642 ArrayRef<int> InnerMask = InnerShuf->getMask();
21643 unsigned NumElts = OuterMask.size();
21644 assert(NumElts == InnerMask.size() && "Mask length mismatch");
21646 int SplatIndex = -1;
21647 for (unsigned i = 0; i != NumElts; ++i) {
21648 // Undef lanes remain undef.
21649 int OuterMaskElt = OuterMask[i];
21650 if (OuterMaskElt == -1)
21651 continue;
21652
21653 // Peek through the shuffle masks to get the underlying source element.
21655 if (InnerMaskElt == -1)
21656 continue;
21657
21658 // Initialize the splatted element.
21659 if (SplatIndex == -1)
21660 SplatIndex = InnerMaskElt;
21661
21662 // Non-matching index - this is not a splat.
21663 if (SplatIndex != InnerMaskElt)
21664 return SDValue();
21665
21667 }
21668 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
21669 getSplatIndex(CombinedMask) != -1) &&
21670 "Expected a splat mask");
21671
21672 // TODO: The transform may be a win even if the mask is not legal.
21673 EVT VT = OuterShuf->getValueType(0);
21674 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
21676 return SDValue();
21677
21678 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21679 InnerShuf->getOperand(1), CombinedMask);
21680}
21681
21682/// If the shuffle mask is taking exactly one element from the first vector
21683/// operand and passing through all other elements from the second vector
21684/// operand, return the index of the mask element that is choosing an element
21685/// from the first operand. Otherwise, return -1.
21687 int MaskSize = Mask.size();
21688 int EltFromOp0 = -1;
21689 // TODO: This does not match if there are undef elements in the shuffle mask.
21690 // Should we ignore undefs in the shuffle mask instead? The trade-off is
21691 // removing an instruction (a shuffle), but losing the knowledge that some
21692 // vector lanes are not needed.
21693 for (int i = 0; i != MaskSize; ++i) {
21694 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21695 // We're looking for a shuffle of exactly one element from operand 0.
21696 if (EltFromOp0 != -1)
21697 return -1;
21698 EltFromOp0 = i;
21699 } else if (Mask[i] != i + MaskSize) {
21700 // Nothing from operand 1 can change lanes.
21701 return -1;
21702 }
21703 }
21704 return EltFromOp0;
21705}
21706
21707/// If a shuffle inserts exactly one element from a source vector operand into
21708/// another vector operand and we can access the specified element as a scalar,
21709/// then we can eliminate the shuffle.
21711 SelectionDAG &DAG) {
21712 // First, check if we are taking one element of a vector and shuffling that
21713 // element into another vector.
21714 ArrayRef<int> Mask = Shuf->getMask();
21715 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21716 SDValue Op0 = Shuf->getOperand(0);
21717 SDValue Op1 = Shuf->getOperand(1);
21719 if (ShufOp0Index == -1) {
21720 // Commute mask and check again.
21723 if (ShufOp0Index == -1)
21724 return SDValue();
21725 // Commute operands to match the commuted shuffle mask.
21726 std::swap(Op0, Op1);
21727 Mask = CommutedMask;
21728 }
21729
21730 // The shuffle inserts exactly one element from operand 0 into operand 1.
21731 // Now see if we can access that element as a scalar via a real insert element
21732 // instruction.
21733 // TODO: We can try harder to locate the element as a scalar. Examples: it
21734 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21735 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21736 "Shuffle mask value must be from operand 0");
21737 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21738 return SDValue();
21739
21741 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21742 return SDValue();
21743
21744 // There's an existing insertelement with constant insertion index, so we
21745 // don't need to check the legality/profitability of a replacement operation
21746 // that differs at most in the constant value. The target should be able to
21747 // lower any of those in a similar way. If not, legalization will expand this
21748 // to a scalar-to-vector plus shuffle.
21749 //
21750 // Note that the shuffle may move the scalar from the position that the insert
21751 // element used. Therefore, our new insert element occurs at the shuffle's
21752 // mask index value, not the insert's index value.
21753 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21756 Op1, Op0.getOperand(1), NewInsIndex);
21757}
21758
21759/// If we have a unary shuffle of a shuffle, see if it can be folded away
21760/// completely. This has the potential to lose undef knowledge because the first
21761/// shuffle may not have an undef mask element where the second one does. So
21762/// only call this after doing simplifications based on demanded elements.
21764 // shuf (shuf0 X, Y, Mask0), undef, Mask
21765 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21766 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21767 return SDValue();
21768
21769 ArrayRef<int> Mask = Shuf->getMask();
21770 ArrayRef<int> Mask0 = Shuf0->getMask();
21771 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21772 // Ignore undef elements.
21773 if (Mask[i] == -1)
21774 continue;
21775 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
21776
21777 // Is the element of the shuffle operand chosen by this shuffle the same as
21778 // the element chosen by the shuffle operand itself?
21779 if (Mask0[Mask[i]] != Mask0[i])
21780 return SDValue();
21781 }
21782 // Every element of this shuffle is identical to the result of the previous
21783 // shuffle, so we can replace this value.
21784 return Shuf->getOperand(0);
21785}
21786
21787SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21788 EVT VT = N->getValueType(0);
21789 unsigned NumElts = VT.getVectorNumElements();
21790
21791 SDValue N0 = N->getOperand(0);
21792 SDValue N1 = N->getOperand(1);
21793
21794 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
21795
21796 // Canonicalize shuffle undef, undef -> undef
21797 if (N0.isUndef() && N1.isUndef())
21798 return DAG.getUNDEF(VT);
21799
21801
21802 // Canonicalize shuffle v, v -> v, undef
21803 if (N0 == N1)
21804 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
21805 createUnaryMask(SVN->getMask(), NumElts));
21806
21807 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
21808 if (N0.isUndef())
21809 return DAG.getCommutedVectorShuffle(*SVN);
21810
21811 // Remove references to rhs if it is undef
21812 if (N1.isUndef()) {
21813 bool Changed = false;
21815 for (unsigned i = 0; i != NumElts; ++i) {
21816 int Idx = SVN->getMaskElt(i);
21817 if (Idx >= (int)NumElts) {
21818 Idx = -1;
21819 Changed = true;
21820 }
21821 NewMask.push_back(Idx);
21822 }
21823 if (Changed)
21824 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21825 }
21826
21828 return InsElt;
21829
21830 // A shuffle of a single vector that is a splatted value can always be folded.
21831 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21832 return V;
21833
21834 if (SDValue V = formSplatFromShuffles(SVN, DAG))
21835 return V;
21836
21837 // If it is a splat, check if the argument vector is another splat or a
21838 // build_vector.
21839 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21840 int SplatIndex = SVN->getSplatIndex();
21841 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21842 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21843 // splat (vector_bo L, R), Index -->
21844 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21845 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21846 SDLoc DL(N);
21847 EVT EltVT = VT.getScalarType();
21848 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21851 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21852 N0.getNode()->getFlags());
21855 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21856 }
21857
21858 // If this is a bit convert that changes the element type of the vector but
21859 // not the number of vector elements, look through it. Be careful not to
21860 // look though conversions that change things like v4f32 to v2f64.
21861 SDNode *V = N0.getNode();
21862 if (V->getOpcode() == ISD::BITCAST) {
21863 SDValue ConvInput = V->getOperand(0);
21864 if (ConvInput.getValueType().isVector() &&
21865 ConvInput.getValueType().getVectorNumElements() == NumElts)
21866 V = ConvInput.getNode();
21867 }
21868
21869 if (V->getOpcode() == ISD::BUILD_VECTOR) {
21870 assert(V->getNumOperands() == NumElts &&
21871 "BUILD_VECTOR has wrong number of operands");
21872 SDValue Base;
21873 bool AllSame = true;
21874 for (unsigned i = 0; i != NumElts; ++i) {
21875 if (!V->getOperand(i).isUndef()) {
21876 Base = V->getOperand(i);
21877 break;
21878 }
21879 }
21880 // Splat of <u, u, u, u>, return <u, u, u, u>
21881 if (!Base.getNode())
21882 return N0;
21883 for (unsigned i = 0; i != NumElts; ++i) {
21884 if (V->getOperand(i) != Base) {
21885 AllSame = false;
21886 break;
21887 }
21888 }
21889 // Splat of <x, x, x, x>, return <x, x, x, x>
21890 if (AllSame)
21891 return N0;
21892
21893 // Canonicalize any other splat as a build_vector.
21894 SDValue Splatted = V->getOperand(SplatIndex);
21896 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21897
21898 // We may have jumped through bitcasts, so the type of the
21899 // BUILD_VECTOR may not match the type of the shuffle.
21900 if (V->getValueType(0) != VT)
21901 NewBV = DAG.getBitcast(VT, NewBV);
21902 return NewBV;
21903 }
21904 }
21905
21906 // Simplify source operands based on shuffle mask.
21908 return SDValue(N, 0);
21909
21910 // This is intentionally placed after demanded elements simplification because
21911 // it could eliminate knowledge of undef elements created by this shuffle.
21913 return ShufOp;
21914
21915 // Match shuffles that can be converted to any_vector_extend_in_reg.
21916 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21917 return V;
21918
21919 // Combine "truncate_vector_in_reg" style shuffles.
21920 if (SDValue V = combineTruncationShuffle(SVN, DAG))
21921 return V;
21922
21923 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21924 Level < AfterLegalizeVectorOps &&
21925 (N1.isUndef() ||
21926 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21927 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21928 if (SDValue V = partitionShuffleOfConcats(N, DAG))
21929 return V;
21930 }
21931
21932 // A shuffle of a concat of the same narrow vector can be reduced to use
21933 // only low-half elements of a concat with undef:
21934 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21935 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21936 N0.getNumOperands() == 2 &&
21937 N0.getOperand(0) == N0.getOperand(1)) {
21938 int HalfNumElts = (int)NumElts / 2;
21940 for (unsigned i = 0; i != NumElts; ++i) {
21941 int Idx = SVN->getMaskElt(i);
21942 if (Idx >= HalfNumElts) {
21943 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21944 Idx -= HalfNumElts;
21945 }
21946 NewMask.push_back(Idx);
21947 }
21948 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21951 N0.getOperand(0), UndefVec);
21952 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21953 }
21954 }
21955
21956 // See if we can replace a shuffle with an insert_subvector.
21957 // e.g. v2i32 into v8i32:
21958 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21959 // --> insert_subvector(lhs,rhs1,4).
21960 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21963 // Ensure RHS subvectors are legal.
21964 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
21965 EVT SubVT = RHS.getOperand(0).getValueType();
21966 int NumSubVecs = RHS.getNumOperands();
21967 int NumSubElts = SubVT.getVectorNumElements();
21968 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
21969 if (!TLI.isTypeLegal(SubVT))
21970 return SDValue();
21971
21972 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21973 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21974 return SDValue();
21975
21976 // Search [NumSubElts] spans for RHS sequence.
21977 // TODO: Can we avoid nested loops to increase performance?
21979 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21980 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21981 // Reset mask to identity.
21982 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21983
21984 // Add subvector insertion.
21985 std::iota(InsertionMask.begin() + SubIdx,
21986 InsertionMask.begin() + SubIdx + NumSubElts,
21987 NumElts + (SubVec * NumSubElts));
21988
21989 // See if the shuffle mask matches the reference insertion mask.
21990 bool MatchingShuffle = true;
21991 for (int i = 0; i != (int)NumElts; ++i) {
21992 int ExpectIdx = InsertionMask[i];
21993 int ActualIdx = Mask[i];
21994 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21995 MatchingShuffle = false;
21996 break;
21997 }
21998 }
21999
22000 if (MatchingShuffle)
22001 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
22002 RHS.getOperand(SubVec),
22003 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
22004 }
22005 }
22006 return SDValue();
22007 };
22008 ArrayRef<int> Mask = SVN->getMask();
22009 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
22010 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
22011 return InsertN1;
22012 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
22013 SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
22016 return InsertN0;
22017 }
22018 }
22019
22020 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
22021 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
22022 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
22023 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
22024 return Res;
22025
22026 // If this shuffle only has a single input that is a bitcasted shuffle,
22027 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
22028 // back to their original types.
22029 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
22030 N1.isUndef() && Level < AfterLegalizeVectorOps &&
22031 TLI.isTypeLegal(VT)) {
22032
22034 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
22035 EVT SVT = VT.getScalarType();
22036 EVT InnerVT = BC0->getValueType(0);
22037 EVT InnerSVT = InnerVT.getScalarType();
22038
22039 // Determine which shuffle works with the smaller scalar type.
22040 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
22041 EVT ScaleSVT = ScaleVT.getScalarType();
22042
22043 if (TLI.isTypeLegal(ScaleVT) &&
22044 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
22045 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
22046 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22047 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22048
22049 // Scale the shuffle masks to the smaller scalar type.
22055
22056 // Merge the shuffle masks.
22058 for (int M : OuterMask)
22059 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
22060
22061 // Test for shuffle mask legality over both commutations.
22062 SDValue SV0 = BC0->getOperand(0);
22063 SDValue SV1 = BC0->getOperand(1);
22065 if (!LegalMask) {
22066 std::swap(SV0, SV1);
22069 }
22070
22071 if (LegalMask) {
22072 SV0 = DAG.getBitcast(ScaleVT, SV0);
22073 SV1 = DAG.getBitcast(ScaleVT, SV1);
22074 return DAG.getBitcast(
22075 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
22076 }
22077 }
22078 }
22079 }
22080
22081 // Compute the combined shuffle mask for a shuffle with SV0 as the first
22082 // operand, and SV1 as the second operand.
22083 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
22084 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
22085 auto MergeInnerShuffle =
22086 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
22088 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
22089 SmallVectorImpl<int> &Mask) -> bool {
22090 // Don't try to fold splats; they're likely to simplify somehow, or they
22091 // might be free.
22092 if (OtherSVN->isSplat())
22093 return false;
22094
22095 SV0 = SV1 = SDValue();
22096 Mask.clear();
22097
22098 for (unsigned i = 0; i != NumElts; ++i) {
22099 int Idx = SVN->getMaskElt(i);
22100 if (Idx < 0) {
22101 // Propagate Undef.
22102 Mask.push_back(Idx);
22103 continue;
22104 }
22105
22106 if (Commute)
22107 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
22108
22110 if (Idx < (int)NumElts) {
22111 // This shuffle index refers to the inner shuffle N0. Lookup the inner
22112 // shuffle mask to identify which vector is actually referenced.
22113 Idx = OtherSVN->getMaskElt(Idx);
22114 if (Idx < 0) {
22115 // Propagate Undef.
22116 Mask.push_back(Idx);
22117 continue;
22118 }
22119 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
22120 : OtherSVN->getOperand(1);
22121 } else {
22122 // This shuffle index references an element within N1.
22123 CurrentVec = N1;
22124 }
22125
22126 // Simple case where 'CurrentVec' is UNDEF.
22127 if (CurrentVec.isUndef()) {
22128 Mask.push_back(-1);
22129 continue;
22130 }
22131
22132 // Canonicalize the shuffle index. We don't know yet if CurrentVec
22133 // will be the first or second operand of the combined shuffle.
22134 Idx = Idx % NumElts;
22135 if (!SV0.getNode() || SV0 == CurrentVec) {
22136 // Ok. CurrentVec is the left hand side.
22137 // Update the mask accordingly.
22138 SV0 = CurrentVec;
22139 Mask.push_back(Idx);
22140 continue;
22141 }
22142 if (!SV1.getNode() || SV1 == CurrentVec) {
22143 // Ok. CurrentVec is the right hand side.
22144 // Update the mask accordingly.
22145 SV1 = CurrentVec;
22146 Mask.push_back(Idx + NumElts);
22147 continue;
22148 }
22149
22150 // Last chance - see if the vector is another shuffle and if it
22151 // uses one of the existing candidate shuffle ops.
22153 int InnerIdx = CurrentSVN->getMaskElt(Idx);
22154 if (InnerIdx < 0) {
22155 Mask.push_back(-1);
22156 continue;
22157 }
22158 SDValue InnerVec = (InnerIdx < (int)NumElts)
22159 ? CurrentSVN->getOperand(0)
22160 : CurrentSVN->getOperand(1);
22161 if (InnerVec.isUndef()) {
22162 Mask.push_back(-1);
22163 continue;
22164 }
22165 InnerIdx %= NumElts;
22166 if (InnerVec == SV0) {
22167 Mask.push_back(InnerIdx);
22168 continue;
22169 }
22170 if (InnerVec == SV1) {
22171 Mask.push_back(InnerIdx + NumElts);
22172 continue;
22173 }
22174 }
22175
22176 // Bail out if we cannot convert the shuffle pair into a single shuffle.
22177 return false;
22178 }
22179
22180 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22181 return true;
22182
22183 // Avoid introducing shuffles with illegal mask.
22184 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22185 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22186 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22187 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22188 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22189 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22190 if (TLI.isShuffleMaskLegal(Mask, VT))
22191 return true;
22192
22193 std::swap(SV0, SV1);
22195 return TLI.isShuffleMaskLegal(Mask, VT);
22196 };
22197
22198 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22199 // Canonicalize shuffles according to rules:
22200 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22201 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22202 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22203 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22205 // The incoming shuffle must be of the same type as the result of the
22206 // current shuffle.
22207 assert(N1->getOperand(0).getValueType() == VT &&
22208 "Shuffle types don't match");
22209
22210 SDValue SV0 = N1->getOperand(0);
22211 SDValue SV1 = N1->getOperand(1);
22212 bool HasSameOp0 = N0 == SV0;
22213 bool IsSV1Undef = SV1.isUndef();
22214 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22215 // Commute the operands of this shuffle so merging below will trigger.
22216 return DAG.getCommutedVectorShuffle(*SVN);
22217 }
22218
22219 // Canonicalize splat shuffles to the RHS to improve merging below.
22220 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22221 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22222 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22223 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22224 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22225 return DAG.getCommutedVectorShuffle(*SVN);
22226 }
22227
22228 // Try to fold according to rules:
22229 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22230 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22231 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22232 // Don't try to fold shuffles with illegal type.
22233 // Only fold if this shuffle is the only user of the other shuffle.
22234 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22235 for (int i = 0; i != 2; ++i) {
22236 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22237 N->isOnlyUserOf(N->getOperand(i).getNode())) {
22238 // The incoming shuffle must be of the same type as the result of the
22239 // current shuffle.
22240 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22241 assert(OtherSV->getOperand(0).getValueType() == VT &&
22242 "Shuffle types don't match");
22243
22244 SDValue SV0, SV1;
22246 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
22247 SV0, SV1, Mask)) {
22248 // Check if all indices in Mask are Undef. In case, propagate Undef.
22249 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22250 return DAG.getUNDEF(VT);
22251
22252 return DAG.getVectorShuffle(VT, SDLoc(N),
22253 SV0 ? SV0 : DAG.getUNDEF(VT),
22254 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
22255 }
22256 }
22257 }
22258
22259 // Merge shuffles through binops if we are able to merge it with at least
22260 // one other shuffles.
22261 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
22262 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
22263 unsigned SrcOpcode = N0.getOpcode();
22264 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
22265 (N1.isUndef() ||
22266 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
22267 // Get binop source ops, or just pass on the undef.
22268 SDValue Op00 = N0.getOperand(0);
22269 SDValue Op01 = N0.getOperand(1);
22270 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
22271 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
22272 // TODO: We might be able to relax the VT check but we don't currently
22273 // have any isBinOp() that has different result/ops VTs so play safe until
22274 // we have test coverage.
22275 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
22276 Op01.getValueType() == VT && Op11.getValueType() == VT &&
22277 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
22278 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
22279 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
22280 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
22283 bool Commute) {
22284 SDValue InnerN = Commute ? N1 : N0;
22285 SDValue Op0 = LeftOp ? Op00 : Op01;
22286 SDValue Op1 = LeftOp ? Op10 : Op11;
22287 if (Commute)
22288 std::swap(Op0, Op1);
22289 // Only accept the merged shuffle if we don't introduce undef elements,
22290 // or the inner shuffle already contained undef elements.
22292 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
22293 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
22294 Mask) &&
22295 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
22296 llvm::none_of(Mask, [](int M) { return M < 0; }));
22297 };
22298
22299 // Ensure we don't increase the number of shuffles - we must merge a
22300 // shuffle from at least one of the LHS and RHS ops.
22301 bool MergedLeft = false;
22304 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
22306 MergedLeft = true;
22307 } else {
22308 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22309 LeftSV0 = Op00, LeftSV1 = Op10;
22310 }
22311
22312 bool MergedRight = false;
22315 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
22317 MergedRight = true;
22318 } else {
22319 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22321 }
22322
22323 if (MergedLeft || MergedRight) {
22324 SDLoc DL(N);
22325 SDValue LHS = DAG.getVectorShuffle(
22326 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
22327 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
22328 SDValue RHS = DAG.getVectorShuffle(
22329 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
22330 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
22331 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
22332 }
22333 }
22334 }
22335 }
22336
22338 return V;
22339
22340 return SDValue();
22341}
22342
22343SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
22344 SDValue InVal = N->getOperand(0);
22345 EVT VT = N->getValueType(0);
22346
22347 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
22348 // with a VECTOR_SHUFFLE and possible truncate.
22349 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22350 VT.isFixedLengthVector() &&
22351 InVal->getOperand(0).getValueType().isFixedLengthVector()) {
22352 SDValue InVec = InVal->getOperand(0);
22353 SDValue EltNo = InVal->getOperand(1);
22354 auto InVecT = InVec.getValueType();
22356 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
22357 int Elt = C0->getZExtValue();
22358 NewMask[0] = Elt;
22359 // If we have an implict truncate do truncate here as long as it's legal.
22360 // if it's not legal, this should
22361 if (VT.getScalarType() != InVal.getValueType() &&
22362 InVal.getValueType().isScalarInteger() &&
22363 isTypeLegal(VT.getScalarType())) {
22364 SDValue Val =
22366 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
22367 }
22368 if (VT.getScalarType() == InVecT.getScalarType() &&
22369 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
22371 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
22372 DAG.getUNDEF(InVecT), NewMask, DAG);
22373 if (LegalShuffle) {
22374 // If the initial vector is the correct size this shuffle is a
22375 // valid result.
22376 if (VT == InVecT)
22377 return LegalShuffle;
22378 // If not we must truncate the vector.
22379 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
22382 InVecT.getVectorElementType(),
22386 }
22387 }
22388 }
22389 }
22390 }
22391
22392 return SDValue();
22393}
22394
22395SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
22396 EVT VT = N->getValueType(0);
22397 SDValue N0 = N->getOperand(0);
22398 SDValue N1 = N->getOperand(1);
22399 SDValue N2 = N->getOperand(2);
22400 uint64_t InsIdx = N->getConstantOperandVal(2);
22401
22402 // If inserting an UNDEF, just return the original vector.
22403 if (N1.isUndef())
22404 return N0;
22405
22406 // If this is an insert of an extracted vector into an undef vector, we can
22407 // just use the input to the extract.
22408 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22409 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
22410 return N1.getOperand(0);
22411
22412 // If we are inserting a bitcast value into an undef, with the same
22413 // number of elements, just use the bitcast input of the extract.
22414 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
22415 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
22416 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
22417 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22418 N1.getOperand(0).getOperand(1) == N2 &&
22419 N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
22420 VT.getVectorElementCount() &&
22421 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
22422 VT.getSizeInBits()) {
22423 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
22424 }
22425
22426 // If both N1 and N2 are bitcast values on which insert_subvector
22427 // would makes sense, pull the bitcast through.
22428 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
22429 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
22430 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
22431 SDValue CN0 = N0.getOperand(0);
22432 SDValue CN1 = N1.getOperand(0);
22433 EVT CN0VT = CN0.getValueType();
22434 EVT CN1VT = CN1.getValueType();
22435 if (CN0VT.isVector() && CN1VT.isVector() &&
22436 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
22437 CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
22439 CN0.getValueType(), CN0, CN1, N2);
22440 return DAG.getBitcast(VT, NewINSERT);
22441 }
22442 }
22443
22444 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
22445 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
22446 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
22447 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22448 N0.getOperand(1).getValueType() == N1.getValueType() &&
22449 N0.getOperand(2) == N2)
22450 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
22451 N1, N2);
22452
22453 // Eliminate an intermediate insert into an undef vector:
22454 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
22455 // insert_subvector undef, X, N2
22456 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
22457 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
22458 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
22459 N1.getOperand(1), N2);
22460
22461 // Push subvector bitcasts to the output, adjusting the index as we go.
22462 // insert_subvector(bitcast(v), bitcast(s), c1)
22463 // -> bitcast(insert_subvector(v, s, c2))
22464 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
22465 N1.getOpcode() == ISD::BITCAST) {
22468 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
22469 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
22470 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
22471 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
22472 EVT NewVT;
22473 SDLoc DL(N);
22474 SDValue NewIdx;
22475 LLVMContext &Ctx = *DAG.getContext();
22477 unsigned EltSizeInBits = VT.getScalarSizeInBits();
22478 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
22479 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
22480 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
22481 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
22482 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
22483 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
22484 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
22486 NumElts.divideCoefficientBy(Scale));
22487 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
22488 }
22489 }
22490 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
22491 SDValue Res = DAG.getBitcast(NewVT, N0Src);
22492 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
22493 return DAG.getBitcast(VT, Res);
22494 }
22495 }
22496 }
22497
22498 // Canonicalize insert_subvector dag nodes.
22499 // Example:
22500 // (insert_subvector (insert_subvector A, Idx0), Idx1)
22501 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
22502 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
22503 N1.getValueType() == N0.getOperand(1).getValueType()) {
22504 unsigned OtherIdx = N0.getConstantOperandVal(2);
22505 if (InsIdx < OtherIdx) {
22506 // Swap nodes.
22508 N0.getOperand(0), N1, N2);
22509 AddToWorklist(NewOp.getNode());
22510 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
22511 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
22512 }
22513 }
22514
22515 // If the input vector is a concatenation, and the insert replaces
22516 // one of the pieces, we can optimize into a single concat_vectors.
22517 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
22518 N0.getOperand(0).getValueType() == N1.getValueType() &&
22520 N1.getValueType().isScalableVector()) {
22521 unsigned Factor = N1.getValueType().getVectorMinNumElements();
22522 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
22523 Ops[InsIdx / Factor] = N1;
22524 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
22525 }
22526
22527 // Simplify source operands based on insertion.
22529 return SDValue(N, 0);
22530
22531 return SDValue();
22532}
22533
22534SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
22535 SDValue N0 = N->getOperand(0);
22536
22537 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
22538 if (N0->getOpcode() == ISD::FP16_TO_FP)
22539 return N0->getOperand(0);
22540
22541 return SDValue();
22542}
22543
22544SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
22545 SDValue N0 = N->getOperand(0);
22546
22547 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
22548 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
22550 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
22551 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
22552 N0.getOperand(0));
22553 }
22554 }
22555
22556 return SDValue();
22557}
22558
22559SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
22560 SDValue N0 = N->getOperand(0);
22561 EVT VT = N0.getValueType();
22562 unsigned Opcode = N->getOpcode();
22563
22564 // VECREDUCE over 1-element vector is just an extract.
22565 if (VT.getVectorElementCount().isScalar()) {
22566 SDLoc dl(N);
22567 SDValue Res =
22569 DAG.getVectorIdxConstant(0, dl));
22570 if (Res.getValueType() != N->getValueType(0))
22571 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22572 return Res;
22573 }
22574
22575 // On an boolean vector an and/or reduction is the same as a umin/umax
22576 // reduction. Convert them if the latter is legal while the former isn't.
22577 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22578 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22580 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22581 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22583 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22584 }
22585
22586 return SDValue();
22587}
22588
22589SDValue DAGCombiner::visitVPOp(SDNode *N) {
22590 // VP operations in which all vector elements are disabled - either by
22591 // determining that the mask is all false or that the EVL is 0 - can be
22592 // eliminated.
22593 bool AreAllEltsDisabled = false;
22594 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
22595 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
22596 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
22598 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
22599
22600 // This is the only generic VP combine we support for now.
22601 if (!AreAllEltsDisabled)
22602 return SDValue();
22603
22604 // Binary operations can be replaced by UNDEF.
22605 if (ISD::isVPBinaryOp(N->getOpcode()))
22606 return DAG.getUNDEF(N->getValueType(0));
22607
22608 // VP Memory operations can be replaced by either the chain (stores) or the
22609 // chain + undef (loads).
22610 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
22611 if (MemSD->writeMem())
22612 return MemSD->getChain();
22613 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
22614 }
22615
22616 // Reduction operations return the start operand when no elements are active.
22617 if (ISD::isVPReduction(N->getOpcode()))
22618 return N->getOperand(0);
22619
22620 return SDValue();
22621}
22622
22623/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22624/// with the destination vector and a zero vector.
22625/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22626/// vector_shuffle V, Zero, <0, 4, 2, 4>
22627SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22628 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
22629
22630 EVT VT = N->getValueType(0);
22631 SDValue LHS = N->getOperand(0);
22632 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22633 SDLoc DL(N);
22634
22635 // Make sure we're not running after operation legalization where it
22636 // may have custom lowered the vector shuffles.
22637 if (LegalOperations)
22638 return SDValue();
22639
22640 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22641 return SDValue();
22642
22643 EVT RVT = RHS.getValueType();
22644 unsigned NumElts = RHS.getNumOperands();
22645
22646 // Attempt to create a valid clear mask, splitting the mask into
22647 // sub elements and checking to see if each is
22648 // all zeros or all ones - suitable for shuffle masking.
22649 auto BuildClearMask = [&](int Split) {
22650 int NumSubElts = NumElts * Split;
22651 int NumSubBits = RVT.getScalarSizeInBits() / Split;
22652
22653 SmallVector<int, 8> Indices;
22654 for (int i = 0; i != NumSubElts; ++i) {
22655 int EltIdx = i / Split;
22656 int SubIdx = i % Split;
22657 SDValue Elt = RHS.getOperand(EltIdx);
22658 // X & undef --> 0 (not undef). So this lane must be converted to choose
22659 // from the zero constant vector (same as if the element had all 0-bits).
22660 if (Elt.isUndef()) {
22661 Indices.push_back(i + NumSubElts);
22662 continue;
22663 }
22664
22665 APInt Bits;
22667 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22668 else if (isa<ConstantFPSDNode>(Elt))
22669 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22670 else
22671 return SDValue();
22672
22673 // Extract the sub element from the constant bit mask.
22674 if (DAG.getDataLayout().isBigEndian())
22675 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22676 else
22677 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22678
22679 if (Bits.isAllOnes())
22680 Indices.push_back(i);
22681 else if (Bits == 0)
22682 Indices.push_back(i + NumSubElts);
22683 else
22684 return SDValue();
22685 }
22686
22687 // Let's see if the target supports this vector_shuffle.
22690 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22691 return SDValue();
22692
22693 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22694 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22695 DAG.getBitcast(ClearVT, LHS),
22696 Zero, Indices));
22697 };
22698
22699 // Determine maximum split level (byte level masking).
22700 int MaxSplit = 1;
22701 if (RVT.getScalarSizeInBits() % 8 == 0)
22702 MaxSplit = RVT.getScalarSizeInBits() / 8;
22703
22704 for (int Split = 1; Split <= MaxSplit; ++Split)
22705 if (RVT.getScalarSizeInBits() % Split == 0)
22706 if (SDValue S = BuildClearMask(Split))
22707 return S;
22708
22709 return SDValue();
22710}
22711
22712/// If a vector binop is performed on splat values, it may be profitable to
22713/// extract, scalarize, and insert/splat.
22715 const SDLoc &DL) {
22716 SDValue N0 = N->getOperand(0);
22717 SDValue N1 = N->getOperand(1);
22718 unsigned Opcode = N->getOpcode();
22719 EVT VT = N->getValueType(0);
22721 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22722
22723 // TODO: Remove/replace the extract cost check? If the elements are available
22724 // as scalars, then there may be no extract cost. Should we ask if
22725 // inserting a scalar back into a vector is cheap instead?
22726 int Index0, Index1;
22727 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22729 if (!Src0 || !Src1 || Index0 != Index1 ||
22732 !TLI.isExtractVecEltCheap(VT, Index0) ||
22733 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22734 return SDValue();
22735
22739 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22740
22741 // If all lanes but 1 are undefined, no need to splat the scalar result.
22742 // TODO: Keep track of undefs and use that info in the general case.
22743 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22744 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22745 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22746 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22747 // build_vec ..undef, (bo X, Y), undef...
22749 Ops[Index0] = ScalarBO;
22750 return DAG.getBuildVector(VT, DL, Ops);
22751 }
22752
22753 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22755 return DAG.getBuildVector(VT, DL, Ops);
22756}
22757
22758/// Visit a binary vector operation, like ADD.
22759SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
22760 EVT VT = N->getValueType(0);
22761 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
22762
22763 SDValue LHS = N->getOperand(0);
22764 SDValue RHS = N->getOperand(1);
22765 unsigned Opcode = N->getOpcode();
22766 SDNodeFlags Flags = N->getFlags();
22767
22768 // Move unary shuffles with identical masks after a vector binop:
22769 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22770 // --> shuffle (VBinOp A, B), Undef, Mask
22771 // This does not require type legality checks because we are creating the
22772 // same types of operations that are in the original sequence. We do have to
22773 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22774 // though. This code is adapted from the identical transform in instcombine.
22775 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22776 Opcode != ISD::UREM && Opcode != ISD::SREM &&
22777 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22780 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22781 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22782 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22783 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22784 RHS.getOperand(0), Flags);
22785 SDValue UndefV = LHS.getOperand(1);
22786 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22787 }
22788
22789 // Try to sink a splat shuffle after a binop with a uniform constant.
22790 // This is limited to cases where neither the shuffle nor the constant have
22791 // undefined elements because that could be poison-unsafe or inhibit
22792 // demanded elements analysis. It is further limited to not change a splat
22793 // of an inserted scalar because that may be optimized better by
22794 // load-folding or other target-specific behaviors.
22795 if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22796 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22797 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22798 // binop (splat X), (splat C) --> splat (binop X, C)
22799 SDValue X = Shuf0->getOperand(0);
22800 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22801 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22802 Shuf0->getMask());
22803 }
22804 if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22805 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22806 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22807 // binop (splat C), (splat X) --> splat (binop C, X)
22808 SDValue X = Shuf1->getOperand(0);
22809 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22810 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22811 Shuf1->getMask());
22812 }
22813 }
22814
22815 // The following pattern is likely to emerge with vector reduction ops. Moving
22816 // the binary operation ahead of insertion may allow using a narrower vector
22817 // instruction that has better performance than the wide version of the op:
22818 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22819 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22820 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22821 LHS.getOperand(2) == RHS.getOperand(2) &&
22822 (LHS.hasOneUse() || RHS.hasOneUse())) {
22823 SDValue X = LHS.getOperand(1);
22824 SDValue Y = RHS.getOperand(1);
22825 SDValue Z = LHS.getOperand(2);
22826 EVT NarrowVT = X.getValueType();
22827 if (NarrowVT == Y.getValueType() &&
22828 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22829 LegalOperations)) {
22830 // (binop undef, undef) may not return undef, so compute that result.
22831 SDValue VecC =
22832 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22833 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22834 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22835 }
22836 }
22837
22838 // Make sure all but the first op are undef or constant.
22840 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22841 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22842 return Op.isUndef() ||
22843 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22844 });
22845 };
22846
22847 // The following pattern is likely to emerge with vector reduction ops. Moving
22848 // the binary operation ahead of the concat may allow using a narrower vector
22849 // instruction that has better performance than the wide version of the op:
22850 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22851 // concat (VBinOp X, Y), VecC
22853 (LHS.hasOneUse() || RHS.hasOneUse())) {
22854 EVT NarrowVT = LHS.getOperand(0).getValueType();
22855 if (NarrowVT == RHS.getOperand(0).getValueType() &&
22856 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22857 unsigned NumOperands = LHS.getNumOperands();
22859 for (unsigned i = 0; i != NumOperands; ++i) {
22860 // This constant fold for operands 1 and up.
22861 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22862 RHS.getOperand(i)));
22863 }
22864
22865 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22866 }
22867 }
22868
22869 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
22870 return V;
22871
22872 return SDValue();
22873}
22874
22875SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22876 SDValue N2) {
22877 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
22878
22879 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22880 cast<CondCodeSDNode>(N0.getOperand(2))->get());
22881
22882 // If we got a simplified select_cc node back from SimplifySelectCC, then
22883 // break it down into a new SETCC node, and a new SELECT node, and then return
22884 // the SELECT node, since we were called with a SELECT node.
22885 if (SCC.getNode()) {
22886 // Check to see if we got a select_cc back (to turn into setcc/select).
22887 // Otherwise, just return whatever node we got back, like fabs.
22888 if (SCC.getOpcode() == ISD::SELECT_CC) {
22889 const SDNodeFlags Flags = N0.getNode()->getFlags();
22891 N0.getValueType(),
22892 SCC.getOperand(0), SCC.getOperand(1),
22893 SCC.getOperand(4), Flags);
22894 AddToWorklist(SETCC.getNode());
22895 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22896 SCC.getOperand(2), SCC.getOperand(3));
22897 SelectNode->setFlags(Flags);
22898 return SelectNode;
22899 }
22900
22901 return SCC;
22902 }
22903 return SDValue();
22904}
22905
22906/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22907/// being selected between, see if we can simplify the select. Callers of this
22908/// should assume that TheSelect is deleted if this returns true. As such, they
22909/// should return the appropriate thing (e.g. the node) back to the top-level of
22910/// the DAG combiner loop to avoid it being looked at.
22911bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22912 SDValue RHS) {
22913 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22914 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22915 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22916 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22917 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22918 SDValue Sqrt = RHS;
22919 ISD::CondCode CC;
22920 SDValue CmpLHS;
22921 const ConstantFPSDNode *Zero = nullptr;
22922
22923 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22924 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22925 CmpLHS = TheSelect->getOperand(0);
22926 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22927 } else {
22928 // SELECT or VSELECT
22929 SDValue Cmp = TheSelect->getOperand(0);
22930 if (Cmp.getOpcode() == ISD::SETCC) {
22931 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22932 CmpLHS = Cmp.getOperand(0);
22933 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22934 }
22935 }
22936 if (Zero && Zero->isZero() &&
22937 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22938 CC == ISD::SETULT || CC == ISD::SETLT)) {
22939 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22940 CombineTo(TheSelect, Sqrt);
22941 return true;
22942 }
22943 }
22944 }
22945 // Cannot simplify select with vector condition
22946 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22947
22948 // If this is a select from two identical things, try to pull the operation
22949 // through the select.
22950 if (LHS.getOpcode() != RHS.getOpcode() ||
22951 !LHS.hasOneUse() || !RHS.hasOneUse())
22952 return false;
22953
22954 // If this is a load and the token chain is identical, replace the select
22955 // of two loads with a load through a select of the address to load from.
22956 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22957 // constants have been dropped into the constant pool.
22958 if (LHS.getOpcode() == ISD::LOAD) {
22961
22962 // Token chains must be identical.
22963 if (LHS.getOperand(0) != RHS.getOperand(0) ||
22964 // Do not let this transformation reduce the number of volatile loads.
22965 // Be conservative for atomics for the moment
22966 // TODO: This does appear to be legal for unordered atomics (see D66309)
22967 !LLD->isSimple() || !RLD->isSimple() ||
22968 // FIXME: If either is a pre/post inc/dec load,
22969 // we'd need to split out the address adjustment.
22970 LLD->isIndexed() || RLD->isIndexed() ||
22971 // If this is an EXTLOAD, the VT's must match.
22972 LLD->getMemoryVT() != RLD->getMemoryVT() ||
22973 // If this is an EXTLOAD, the kind of extension must match.
22974 (LLD->getExtensionType() != RLD->getExtensionType() &&
22975 // The only exception is if one of the extensions is anyext.
22976 LLD->getExtensionType() != ISD::EXTLOAD &&
22977 RLD->getExtensionType() != ISD::EXTLOAD) ||
22978 // FIXME: this discards src value information. This is
22979 // over-conservative. It would be beneficial to be able to remember
22980 // both potential memory locations. Since we are discarding
22981 // src value info, don't do the transformation if the memory
22982 // locations are not in the default address space.
22983 LLD->getPointerInfo().getAddrSpace() != 0 ||
22984 RLD->getPointerInfo().getAddrSpace() != 0 ||
22985 // We can't produce a CMOV of a TargetFrameIndex since we won't
22986 // generate the address generation required.
22987 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22988 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22989 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22990 LLD->getBasePtr().getValueType()))
22991 return false;
22992
22993 // The loads must not depend on one another.
22994 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22995 return false;
22996
22997 // Check that the select condition doesn't reach either load. If so,
22998 // folding this will induce a cycle into the DAG. If not, this is safe to
22999 // xform, so create a select of the addresses.
23000
23003
23004 // Always fail if LLD and RLD are not independent. TheSelect is a
23005 // predecessor to all Nodes in question so we need not search past it.
23006
23007 Visited.insert(TheSelect);
23008 Worklist.push_back(LLD);
23009 Worklist.push_back(RLD);
23010
23011 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
23012 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
23013 return false;
23014
23015 SDValue Addr;
23016 if (TheSelect->getOpcode() == ISD::SELECT) {
23017 // We cannot do this optimization if any pair of {RLD, LLD} is a
23018 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
23019 // Loads, we only need to check if CondNode is a successor to one of the
23020 // loads. We can further avoid this if there's no use of their chain
23021 // value.
23022 SDNode *CondNode = TheSelect->getOperand(0).getNode();
23023 Worklist.push_back(CondNode);
23024
23025 if ((LLD->hasAnyUseOfValue(1) &&
23026 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23027 (RLD->hasAnyUseOfValue(1) &&
23028 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23029 return false;
23030
23032 LLD->getBasePtr().getValueType(),
23033 TheSelect->getOperand(0), LLD->getBasePtr(),
23034 RLD->getBasePtr());
23035 } else { // Otherwise SELECT_CC
23036 // We cannot do this optimization if any pair of {RLD, LLD} is a
23037 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
23038 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
23039 // one of the loads. We can further avoid this if there's no use of their
23040 // chain value.
23041
23042 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
23043 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
23044 Worklist.push_back(CondLHS);
23045 Worklist.push_back(CondRHS);
23046
23047 if ((LLD->hasAnyUseOfValue(1) &&
23048 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23049 (RLD->hasAnyUseOfValue(1) &&
23050 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23051 return false;
23052
23054 LLD->getBasePtr().getValueType(),
23055 TheSelect->getOperand(0),
23056 TheSelect->getOperand(1),
23057 LLD->getBasePtr(), RLD->getBasePtr(),
23058 TheSelect->getOperand(4));
23059 }
23060
23061 SDValue Load;
23062 // It is safe to replace the two loads if they have different alignments,
23063 // but the new load must be the minimum (most restrictive) alignment of the
23064 // inputs.
23065 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
23066 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
23067 if (!RLD->isInvariant())
23068 MMOFlags &= ~MachineMemOperand::MOInvariant;
23069 if (!RLD->isDereferenceable())
23070 MMOFlags &= ~MachineMemOperand::MODereferenceable;
23071 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
23072 // FIXME: Discards pointer and AA info.
23073 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
23074 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
23075 MMOFlags);
23076 } else {
23077 // FIXME: Discards pointer and AA info.
23078 Load = DAG.getExtLoad(
23079 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
23080 : LLD->getExtensionType(),
23081 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
23082 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
23083 }
23084
23085 // Users of the select now use the result of the load.
23086 CombineTo(TheSelect, Load);
23087
23088 // Users of the old loads now use the new load's chain. We know the
23089 // old-load value is dead now.
23090 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
23091 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
23092 return true;
23093 }
23094
23095 return false;
23096}
23097
23098/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
23099/// bitwise 'and'.
23100SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
23101 SDValue N1, SDValue N2, SDValue N3,
23102 ISD::CondCode CC) {
23103 // If this is a select where the false operand is zero and the compare is a
23104 // check of the sign bit, see if we can perform the "gzip trick":
23105 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
23106 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
23107 EVT XType = N0.getValueType();
23108 EVT AType = N2.getValueType();
23109 if (!isNullConstant(N3) || !XType.bitsGE(AType))
23110 return SDValue();
23111
23112 // If the comparison is testing for a positive value, we have to invert
23113 // the sign bit mask, so only do that transform if the target has a bitwise
23114 // 'and not' instruction (the invert is free).
23115 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
23116 // (X > -1) ? A : 0
23117 // (X > 0) ? X : 0 <-- This is canonical signed max.
23118 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
23119 return SDValue();
23120 } else if (CC == ISD::SETLT) {
23121 // (X < 0) ? A : 0
23122 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
23123 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
23124 return SDValue();
23125 } else {
23126 return SDValue();
23127 }
23128
23129 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
23130 // constant.
23132 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23133 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
23134 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
23135 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23136 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23137 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23138 AddToWorklist(Shift.getNode());
23139
23140 if (XType.bitsGT(AType)) {
23141 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23142 AddToWorklist(Shift.getNode());
23143 }
23144
23145 if (CC == ISD::SETGT)
23146 Shift = DAG.getNOT(DL, Shift, AType);
23147
23148 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23149 }
23150 }
23151
23152 unsigned ShCt = XType.getSizeInBits() - 1;
23153 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23154 return SDValue();
23155
23156 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23157 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23158 AddToWorklist(Shift.getNode());
23159
23160 if (XType.bitsGT(AType)) {
23161 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23162 AddToWorklist(Shift.getNode());
23163 }
23164
23165 if (CC == ISD::SETGT)
23166 Shift = DAG.getNOT(DL, Shift, AType);
23167
23168 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23169}
23170
23171// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23172SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23173 SDValue N0 = N->getOperand(0);
23174 SDValue N1 = N->getOperand(1);
23175 SDValue N2 = N->getOperand(2);
23176 EVT VT = N->getValueType(0);
23177 SDLoc DL(N);
23178
23179 unsigned BinOpc = N1.getOpcode();
23180 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23181 return SDValue();
23182
23183 // The use checks are intentionally on SDNode because we may be dealing
23184 // with opcodes that produce more than one SDValue.
23185 // TODO: Do we really need to check N0 (the condition operand of the select)?
23186 // But removing that clause could cause an infinite loop...
23187 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23188 return SDValue();
23189
23190 // Binops may include opcodes that return multiple values, so all values
23191 // must be created/propagated from the newly created binops below.
23192 SDVTList OpVTs = N1->getVTList();
23193
23194 // Fold select(cond, binop(x, y), binop(z, y))
23195 // --> binop(select(cond, x, z), y)
23196 if (N1.getOperand(1) == N2.getOperand(1)) {
23197 SDValue NewSel =
23198 DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23199 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23200 NewBinOp->setFlags(N1->getFlags());
23201 NewBinOp->intersectFlagsWith(N2->getFlags());
23202 return NewBinOp;
23203 }
23204
23205 // Fold select(cond, binop(x, y), binop(x, z))
23206 // --> binop(x, select(cond, y, z))
23207 // Second op VT might be different (e.g. shift amount type)
23208 if (N1.getOperand(0) == N2.getOperand(0) &&
23209 VT == N1.getOperand(1).getValueType() &&
23210 VT == N2.getOperand(1).getValueType()) {
23211 SDValue NewSel =
23212 DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
23213 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
23214 NewBinOp->setFlags(N1->getFlags());
23215 NewBinOp->intersectFlagsWith(N2->getFlags());
23216 return NewBinOp;
23217 }
23218
23219 // TODO: Handle isCommutativeBinOp patterns as well?
23220 return SDValue();
23221}
23222
23223// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
23224SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
23225 SDValue N0 = N->getOperand(0);
23226 EVT VT = N->getValueType(0);
23227 bool IsFabs = N->getOpcode() == ISD::FABS;
23228 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
23229
23230 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
23231 return SDValue();
23232
23233 SDValue Int = N0.getOperand(0);
23234 EVT IntVT = Int.getValueType();
23235
23236 // The operand to cast should be integer.
23237 if (!IntVT.isInteger() || IntVT.isVector())
23238 return SDValue();
23239
23240 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
23241 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
23242 APInt SignMask;
23243 if (N0.getValueType().isVector()) {
23244 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
23245 // 0x7f...) per element and splat it.
23247 if (IsFabs)
23248 SignMask = ~SignMask;
23249 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
23250 } else {
23251 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
23252 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
23253 if (IsFabs)
23254 SignMask = ~SignMask;
23255 }
23256 SDLoc DL(N0);
23257 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
23258 DAG.getConstant(SignMask, DL, IntVT));
23259 AddToWorklist(Int.getNode());
23260 return DAG.getBitcast(VT, Int);
23261}
23262
23263/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
23264/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
23265/// in it. This may be a win when the constant is not otherwise available
23266/// because it replaces two constant pool loads with one.
23267SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
23268 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
23269 ISD::CondCode CC) {
23270 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
23271 return SDValue();
23272
23273 // If we are before legalize types, we want the other legalization to happen
23274 // first (for example, to avoid messing with soft float).
23275 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
23276 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
23277 EVT VT = N2.getValueType();
23278 if (!TV || !FV || !TLI.isTypeLegal(VT))
23279 return SDValue();
23280
23281 // If a constant can be materialized without loads, this does not make sense.
23282 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
23283 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
23284 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
23285 return SDValue();
23286
23287 // If both constants have multiple uses, then we won't need to do an extra
23288 // load. The values are likely around in registers for other users.
23289 if (!TV->hasOneUse() && !FV->hasOneUse())
23290 return SDValue();
23291
23292 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
23293 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
23294 Type *FPTy = Elts[0]->getType();
23295 const DataLayout &TD = DAG.getDataLayout();
23296
23297 // Create a ConstantArray of the two constants.
23299 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
23301 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
23302
23303 // Get offsets to the 0 and 1 elements of the array, so we can select between
23304 // them.
23305 SDValue Zero = DAG.getIntPtrConstant(0, DL);
23306 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
23307 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
23308 SDValue Cond =
23309 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
23310 AddToWorklist(Cond.getNode());
23311 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
23312 AddToWorklist(CstOffset.getNode());
23313 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
23314 AddToWorklist(CPIdx.getNode());
23315 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
23317 DAG.getMachineFunction()), Alignment);
23318}
23319
23320/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
23321/// where 'cond' is the comparison specified by CC.
23322SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
23323 SDValue N2, SDValue N3, ISD::CondCode CC,
23324 bool NotExtCompare) {
23325 // (x ? y : y) -> y.
23326 if (N2 == N3) return N2;
23327
23328 EVT CmpOpVT = N0.getValueType();
23330 EVT VT = N2.getValueType();
23331 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
23332 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23333 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
23334
23335 // Determine if the condition we're dealing with is constant.
23336 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
23337 AddToWorklist(SCC.getNode());
23338 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
23339 // fold select_cc true, x, y -> x
23340 // fold select_cc false, x, y -> y
23341 return !(SCCC->isZero()) ? N2 : N3;
23342 }
23343 }
23344
23345 if (SDValue V =
23347 return V;
23348
23349 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
23350 return V;
23351
23352 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
23353 // where y is has a single bit set.
23354 // A plaintext description would be, we can turn the SELECT_CC into an AND
23355 // when the condition can be materialized as an all-ones register. Any
23356 // single bit-test can be materialized as an all-ones register with
23357 // shift-left and shift-right-arith.
23358 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
23359 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
23360 SDValue AndLHS = N0->getOperand(0);
23362 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
23363 // Shift the tested bit over the sign bit.
23364 const APInt &AndMask = ConstAndRHS->getAPIntValue();
23365 unsigned ShCt = AndMask.getBitWidth() - 1;
23366 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
23367 SDValue ShlAmt =
23368 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
23369 getShiftAmountTy(AndLHS.getValueType()));
23370 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
23371
23372 // Now arithmetic right shift it all the way over, so the result is
23373 // either all-ones, or zero.
23374 SDValue ShrAmt =
23375 DAG.getConstant(ShCt, SDLoc(Shl),
23377 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
23378
23379 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
23380 }
23381 }
23382 }
23383
23384 // fold select C, 16, 0 -> shl C, 4
23385 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
23386 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
23387
23388 if ((Fold || Swap) &&
23389 TLI.getBooleanContents(CmpOpVT) ==
23391 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
23392
23393 if (Swap) {
23394 CC = ISD::getSetCCInverse(CC, CmpOpVT);
23395 std::swap(N2C, N3C);
23396 }
23397
23398 // If the caller doesn't want us to simplify this into a zext of a compare,
23399 // don't do it.
23400 if (NotExtCompare && N2C->isOne())
23401 return SDValue();
23402
23403 SDValue Temp, SCC;
23404 // zext (setcc n0, n1)
23405 if (LegalTypes) {
23406 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
23407 if (VT.bitsLT(SCC.getValueType()))
23408 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
23409 else
23410 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23411 } else {
23412 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
23413 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23414 }
23415
23416 AddToWorklist(SCC.getNode());
23417 AddToWorklist(Temp.getNode());
23418
23419 if (N2C->isOne())
23420 return Temp;
23421
23422 unsigned ShCt = N2C->getAPIntValue().logBase2();
23423 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
23424 return SDValue();
23425
23426 // shl setcc result by log2 n2c
23427 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
23428 DAG.getConstant(ShCt, SDLoc(Temp),
23430 }
23431
23432 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
23433 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
23434 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
23435 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
23436 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
23437 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
23438 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
23439 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
23440 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
23441 SDValue ValueOnZero = N2;
23442 SDValue Count = N3;
23443 // If the condition is NE instead of E, swap the operands.
23444 if (CC == ISD::SETNE)
23445 std::swap(ValueOnZero, Count);
23446 // Check if the value on zero is a constant equal to the bits in the type.
23448 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
23449 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
23450 // legal, combine to just cttz.
23451 if ((Count.getOpcode() == ISD::CTTZ ||
23452 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
23453 N0 == Count.getOperand(0) &&
23454 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
23455 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
23456 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
23457 // legal, combine to just ctlz.
23458 if ((Count.getOpcode() == ISD::CTLZ ||
23459 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
23460 N0 == Count.getOperand(0) &&
23461 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
23462 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
23463 }
23464 }
23465 }
23466
23467 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
23468 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
23469 if (!NotExtCompare && N1C && N2C && N3C &&
23470 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
23471 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
23472 (N1C->isZero() && CC == ISD::SETLT)) &&
23473 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
23474 SDValue ASR = DAG.getNode(
23475 ISD::SRA, DL, CmpOpVT, N0,
23476 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
23477 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
23478 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
23479 }
23480
23481 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23482 return S;
23483 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23484 return S;
23485
23486 return SDValue();
23487}
23488
23489/// This is a stub for TargetLowering::SimplifySetCC.
23490SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
23491 ISD::CondCode Cond, const SDLoc &DL,
23492 bool foldBooleans) {
23494 DagCombineInfo(DAG, Level, false, this);
23495 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
23496}
23497
23498/// Given an ISD::SDIV node expressing a divide by constant, return
23499/// a DAG expression to select that will generate the same value by multiplying
23500/// by a magic number.
23501/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23502SDValue DAGCombiner::BuildSDIV(SDNode *N) {
23503 // when optimising for minimum size, we don't want to expand a div to a mul
23504 // and a shift.
23506 return SDValue();
23507
23509 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
23510 for (SDNode *N : Built)
23511 AddToWorklist(N);
23512 return S;
23513 }
23514
23515 return SDValue();
23516}
23517
23518/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
23519/// DAG expression that will generate the same value by right shifting.
23520SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
23521 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
23522 if (!C)
23523 return SDValue();
23524
23525 // Avoid division by zero.
23526 if (C->isZero())
23527 return SDValue();
23528
23530 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
23531 for (SDNode *N : Built)
23532 AddToWorklist(N);
23533 return S;
23534 }
23535
23536 return SDValue();
23537}
23538
23539/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
23540/// expression that will generate the same value by multiplying by a magic
23541/// number.
23542/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23543SDValue DAGCombiner::BuildUDIV(SDNode *N) {
23544 // when optimising for minimum size, we don't want to expand a div to a mul
23545 // and a shift.
23547 return SDValue();
23548
23550 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
23551 for (SDNode *N : Built)
23552 AddToWorklist(N);
23553 return S;
23554 }
23555
23556 return SDValue();
23557}
23558
23559/// Determines the LogBase2 value for a non-null input value using the
23560/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
23561SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
23562 EVT VT = V.getValueType();
23563 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
23564 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
23565 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
23566 return LogBase2;
23567}
23568
23569/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23570/// For the reciprocal, we need to find the zero of the function:
23571/// F(X) = 1/X - A [which has a zero at X = 1/A]
23572/// =>
23573/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
23574/// does not require additional intermediate precision]
23575/// For the last iteration, put numerator N into it to gain more precision:
23576/// Result = N X_i + X_i (N - N A X_i)
23577SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
23578 SDNodeFlags Flags) {
23579 if (LegalDAG)
23580 return SDValue();
23581
23582 // TODO: Handle extended types?
23583 EVT VT = Op.getValueType();
23584 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23585 VT.getScalarType() != MVT::f64)
23586 return SDValue();
23587
23588 // If estimates are explicitly disabled for this function, we're done.
23590 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
23591 if (Enabled == TLI.ReciprocalEstimate::Disabled)
23592 return SDValue();
23593
23594 // Estimates may be explicitly enabled for this type with a custom number of
23595 // refinement steps.
23596 int Iterations = TLI.getDivRefinementSteps(VT, MF);
23597 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
23598 AddToWorklist(Est.getNode());
23599
23600 SDLoc DL(Op);
23601 if (Iterations) {
23602 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
23603
23604 // Newton iterations: Est = Est + Est (N - Arg * Est)
23605 // If this is the last iteration, also multiply by the numerator.
23606 for (int i = 0; i < Iterations; ++i) {
23607 SDValue MulEst = Est;
23608
23609 if (i == Iterations - 1) {
23610 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23611 AddToWorklist(MulEst.getNode());
23612 }
23613
23614 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23615 AddToWorklist(NewEst.getNode());
23616
23617 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23618 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23619 AddToWorklist(NewEst.getNode());
23620
23621 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23622 AddToWorklist(NewEst.getNode());
23623
23624 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23625 AddToWorklist(Est.getNode());
23626 }
23627 } else {
23628 // If no iterations are available, multiply with N.
23629 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23630 AddToWorklist(Est.getNode());
23631 }
23632
23633 return Est;
23634 }
23635
23636 return SDValue();
23637}
23638
23639/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23640/// For the reciprocal sqrt, we need to find the zero of the function:
23641/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23642/// =>
23643/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23644/// As a result, we precompute A/2 prior to the iteration loop.
23645SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23646 unsigned Iterations,
23647 SDNodeFlags Flags, bool Reciprocal) {
23648 EVT VT = Arg.getValueType();
23649 SDLoc DL(Arg);
23650 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23651
23652 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23653 // this entire sequence requires only one FP constant.
23654 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23655 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23656
23657 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23658 for (unsigned i = 0; i < Iterations; ++i) {
23659 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23660 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23661 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23662 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23663 }
23664
23665 // If non-reciprocal square root is requested, multiply the result by Arg.
23666 if (!Reciprocal)
23667 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23668
23669 return Est;
23670}
23671
23672/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23673/// For the reciprocal sqrt, we need to find the zero of the function:
23674/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23675/// =>
23676/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23677SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23678 unsigned Iterations,
23679 SDNodeFlags Flags, bool Reciprocal) {
23680 EVT VT = Arg.getValueType();
23681 SDLoc DL(Arg);
23682 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23683 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23684
23685 // This routine must enter the loop below to work correctly
23686 // when (Reciprocal == false).
23687 assert(Iterations > 0);
23688
23689 // Newton iterations for reciprocal square root:
23690 // E = (E * -0.5) * ((A * E) * E + -3.0)
23691 for (unsigned i = 0; i < Iterations; ++i) {
23692 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23693 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23694 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23695
23696 // When calculating a square root at the last iteration build:
23697 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23698 // (notice a common subexpression)
23699 SDValue LHS;
23700 if (Reciprocal || (i + 1) < Iterations) {
23701 // RSQRT: LHS = (E * -0.5)
23702 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23703 } else {
23704 // SQRT: LHS = (A * E) * -0.5
23705 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23706 }
23707
23708 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23709 }
23710
23711 return Est;
23712}
23713
23714/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23715/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23716/// Op can be zero.
23717SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23718 bool Reciprocal) {
23719 if (LegalDAG)
23720 return SDValue();
23721
23722 // TODO: Handle extended types?
23723 EVT VT = Op.getValueType();
23724 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23725 VT.getScalarType() != MVT::f64)
23726 return SDValue();
23727
23728 // If estimates are explicitly disabled for this function, we're done.
23730 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23731 if (Enabled == TLI.ReciprocalEstimate::Disabled)
23732 return SDValue();
23733
23734 // Estimates may be explicitly enabled for this type with a custom number of
23735 // refinement steps.
23736 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23737
23738 bool UseOneConstNR = false;
23739 if (SDValue Est =
23740 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23741 Reciprocal)) {
23742 AddToWorklist(Est.getNode());
23743
23744 if (Iterations)
23746 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23747 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23748 if (!Reciprocal) {
23749 SDLoc DL(Op);
23750 // Try the target specific test first.
23751 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23752
23753 // The estimate is now completely wrong if the input was exactly 0.0 or
23754 // possibly a denormal. Force the answer to 0.0 or value provided by
23755 // target for those cases.
23756 Est = DAG.getNode(
23757 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23759 }
23760 return Est;
23761 }
23762
23763 return SDValue();
23764}
23765
23766SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23767 return buildSqrtEstimateImpl(Op, Flags, true);
23768}
23769
23770SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23771 return buildSqrtEstimateImpl(Op, Flags, false);
23772}
23773
23774/// Return true if there is any possibility that the two addresses overlap.
23775bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
23776
23777 struct MemUseCharacteristics {
23778 bool IsVolatile;
23779 bool IsAtomic;
23781 int64_t Offset;
23782 Optional<int64_t> NumBytes;
23783 MachineMemOperand *MMO;
23784 };
23785
23786 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23787 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23788 int64_t Offset = 0;
23789 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23790 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23791 ? C->getSExtValue()
23792 : (LSN->getAddressingMode() == ISD::PRE_DEC)
23793 ? -1 * C->getSExtValue()
23794 : 0;
23795 uint64_t Size =
23796 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23797 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23798 Offset /*base offset*/,
23800 LSN->getMemOperand()};
23801 }
23802 if (const auto *LN = cast<LifetimeSDNode>(N))
23803 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23804 (LN->hasOffset()) ? LN->getOffset() : 0,
23805 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23806 : Optional<int64_t>(),
23808 // Default.
23809 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23810 (int64_t)0 /*offset*/,
23811 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23812 };
23813
23814 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23815 MUC1 = getCharacteristics(Op1);
23816
23817 // If they are to the same address, then they must be aliases.
23818 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23819 MUC0.Offset == MUC1.Offset)
23820 return true;
23821
23822 // If they are both volatile then they cannot be reordered.
23823 if (MUC0.IsVolatile && MUC1.IsVolatile)
23824 return true;
23825
23826 // Be conservative about atomics for the moment
23827 // TODO: This is way overconservative for unordered atomics (see D66309)
23828 if (MUC0.IsAtomic && MUC1.IsAtomic)
23829 return true;
23830
23831 if (MUC0.MMO && MUC1.MMO) {
23832 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23833 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23834 return false;
23835 }
23836
23837 // Try to prove that there is aliasing, or that there is no aliasing. Either
23838 // way, we can return now. If nothing can be proved, proceed with more tests.
23839 bool IsAlias;
23840 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23841 DAG, IsAlias))
23842 return IsAlias;
23843
23844 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23845 // either are not known.
23846 if (!MUC0.MMO || !MUC1.MMO)
23847 return true;
23848
23849 // If one operation reads from invariant memory, and the other may store, they
23850 // cannot alias. These should really be checking the equivalent of mayWrite,
23851 // but it only matters for memory nodes other than load /store.
23852 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23853 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23854 return false;
23855
23856 // If we know required SrcValue1 and SrcValue2 have relatively large
23857 // alignment compared to the size and offset of the access, we may be able
23858 // to prove they do not alias. This check is conservative for now to catch
23859 // cases created by splitting vector types, it only works when the offsets are
23860 // multiples of the size of the data.
23861 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23862 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23863 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23864 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23865 auto &Size0 = MUC0.NumBytes;
23866 auto &Size1 = MUC1.NumBytes;
23868 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23869 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23870 SrcValOffset1 % *Size1 == 0) {
23871 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23872 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23873
23874 // There is no overlap between these relatively aligned accesses of
23875 // similar size. Return no alias.
23876 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23877 return false;
23878 }
23879
23880 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23882 : DAG.getSubtarget().useAA();
23883#ifndef NDEBUG
23884 if (CombinerAAOnlyFunc.getNumOccurrences() &&
23886 UseAA = false;
23887#endif
23888
23889 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23890 Size0.hasValue() && Size1.hasValue()) {
23891 // Use alias analysis information.
23892 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23893 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23894 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23895 if (AA->isNoAlias(
23896 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23897 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23898 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23899 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23900 return false;
23901 }
23902
23903 // Otherwise we have to assume they alias.
23904 return true;
23905}
23906
23907/// Walk up chain skipping non-aliasing memory nodes,
23908/// looking for aliasing nodes and adding them to the Aliases vector.
23909void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23910 SmallVectorImpl<SDValue> &Aliases) {
23911 SmallVector<SDValue, 8> Chains; // List of chains to visit.
23912 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
23913
23914 // Get alias information for node.
23915 // TODO: relax aliasing for unordered atomics (see D66309)
23916 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23917
23918 // Starting off.
23919 Chains.push_back(OriginalChain);
23920 unsigned Depth = 0;
23921
23922 // Attempt to improve chain by a single step
23923 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23924 switch (C.getOpcode()) {
23925 case ISD::EntryToken:
23926 // No need to mark EntryToken.
23927 C = SDValue();
23928 return true;
23929 case ISD::LOAD:
23930 case ISD::STORE: {
23931 // Get alias information for C.
23932 // TODO: Relax aliasing for unordered atomics (see D66309)
23933 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23934 cast<LSBaseSDNode>(C.getNode())->isSimple();
23935 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
23936 // Look further up the chain.
23937 C = C.getOperand(0);
23938 return true;
23939 }
23940 // Alias, so stop here.
23941 return false;
23942 }
23943
23944 case ISD::CopyFromReg:
23945 // Always forward past past CopyFromReg.
23946 C = C.getOperand(0);
23947 return true;
23948
23950 case ISD::LIFETIME_END: {
23951 // We can forward past any lifetime start/end that can be proven not to
23952 // alias the memory access.
23953 if (!mayAlias(N, C.getNode())) {
23954 // Look further up the chain.
23955 C = C.getOperand(0);
23956 return true;
23957 }
23958 return false;
23959 }
23960 default:
23961 return false;
23962 }
23963 };
23964
23965 // Look at each chain and determine if it is an alias. If so, add it to the
23966 // aliases list. If not, then continue up the chain looking for the next
23967 // candidate.
23968 while (!Chains.empty()) {
23969 SDValue Chain = Chains.pop_back_val();
23970
23971 // Don't bother if we've seen Chain before.
23972 if (!Visited.insert(Chain.getNode()).second)
23973 continue;
23974
23975 // For TokenFactor nodes, look at each operand and only continue up the
23976 // chain until we reach the depth limit.
23977 //
23978 // FIXME: The depth check could be made to return the last non-aliasing
23979 // chain we found before we hit a tokenfactor rather than the original
23980 // chain.
23981 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23982 Aliases.clear();
23983 Aliases.push_back(OriginalChain);
23984 return;
23985 }
23986
23987 if (Chain.getOpcode() == ISD::TokenFactor) {
23988 // We have to check each of the operands of the token factor for "small"
23989 // token factors, so we queue them up. Adding the operands to the queue
23990 // (stack) in reverse order maintains the original order and increases the
23991 // likelihood that getNode will find a matching token factor (CSE.)
23992 if (Chain.getNumOperands() > 16) {
23993 Aliases.push_back(Chain);
23994 continue;
23995 }
23996 for (unsigned n = Chain.getNumOperands(); n;)
23997 Chains.push_back(Chain.getOperand(--n));
23998 ++Depth;
23999 continue;
24000 }
24001 // Everything else
24002 if (ImproveChain(Chain)) {
24003 // Updated Chain Found, Consider new chain if one exists.
24004 if (Chain.getNode())
24005 Chains.push_back(Chain);
24006 ++Depth;
24007 continue;
24008 }
24009 // No Improved Chain Possible, treat as Alias.
24010 Aliases.push_back(Chain);
24011 }
24012}
24013
24014/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
24015/// (aliasing node.)
24016SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
24017 if (OptLevel == CodeGenOpt::None)
24018 return OldChain;
24019
24020 // Ops for replacing token factor.
24022
24023 // Accumulate all the aliases to this node.
24024 GatherAllAliases(N, OldChain, Aliases);
24025
24026 // If no operands then chain to entry token.
24027 if (Aliases.size() == 0)
24028 return DAG.getEntryNode();
24029
24030 // If a single operand then chain to it. We don't need to revisit it.
24031 if (Aliases.size() == 1)
24032 return Aliases[0];
24033
24034 // Construct a custom tailored token factor.
24035 return DAG.getTokenFactor(SDLoc(N), Aliases);
24036}
24037
24038namespace {
24039// TODO: Replace with with std::monostate when we move to C++17.
24040struct UnitT { } Unit;
24041bool operator==(const UnitT &, const UnitT &) { return true; }
24042bool operator!=(const UnitT &, const UnitT &) { return false; }
24043} // namespace
24044
24045// This function tries to collect a bunch of potentially interesting
24046// nodes to improve the chains of, all at once. This might seem
24047// redundant, as this function gets called when visiting every store
24048// node, so why not let the work be done on each store as it's visited?
24049//
24050// I believe this is mainly important because mergeConsecutiveStores
24051// is unable to deal with merging stores of different sizes, so unless
24052// we improve the chains of all the potential candidates up-front
24053// before running mergeConsecutiveStores, it might only see some of
24054// the nodes that will eventually be candidates, and then not be able
24055// to go from a partially-merged state to the desired final
24056// fully-merged state.
24057
24058bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
24061 // Intervals records which offsets from BaseIndex have been covered. In
24062 // the common case, every store writes to the immediately previous address
24063 // space and thus merged with the previous interval at insertion time.
24064
24065 using IMap =
24067 IMap::Allocator A;
24068 IMap Intervals(A);
24069
24070 // This holds the base pointer, index, and the offset in bytes from the base
24071 // pointer.
24073
24074 // We must have a base and an offset.
24075 if (!BasePtr.getBase().getNode())
24076 return false;
24077
24078 // Do not handle stores to undef base pointers.
24079 if (BasePtr.getBase().isUndef())
24080 return false;
24081
24082 // Do not handle stores to opaque types
24083 if (St->getMemoryVT().isZeroSized())
24084 return false;
24085
24086 // BaseIndexOffset assumes that offsets are fixed-size, which
24087 // is not valid for scalable vectors where the offsets are
24088 // scaled by `vscale`, so bail out early.
24089 if (St->getMemoryVT().isScalableVector())
24090 return false;
24091
24092 // Add ST's interval.
24093 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
24094
24095 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
24096 if (Chain->getMemoryVT().isScalableVector())
24097 return false;
24098
24099 // If the chain has more than one use, then we can't reorder the mem ops.
24100 if (!SDValue(Chain, 0)->hasOneUse())
24101 break;
24102 // TODO: Relax for unordered atomics (see D66309)
24103 if (!Chain->isSimple() || Chain->isIndexed())
24104 break;
24105
24106 // Find the base pointer and offset for this memory node.
24107 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
24108 // Check that the base pointer is the same as the original one.
24109 int64_t Offset;
24110 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
24111 break;
24112 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
24113 // Make sure we don't overlap with other intervals by checking the ones to
24114 // the left or right before inserting.
24115 auto I = Intervals.find(Offset);
24116 // If there's a next interval, we should end before it.
24117 if (I != Intervals.end() && I.start() < (Offset + Length))
24118 break;
24119 // If there's a previous interval, we should start after it.
24120 if (I != Intervals.begin() && (--I).stop() <= Offset)
24121 break;
24122 Intervals.insert(Offset, Offset + Length, Unit);
24123
24124 ChainedStores.push_back(Chain);
24125 STChain = Chain;
24126 }
24127
24128 // If we didn't find a chained store, exit.
24129 if (ChainedStores.size() == 0)
24130 return false;
24131
24132 // Improve all chained stores (St and ChainedStores members) starting from
24133 // where the store chain ended and return single TokenFactor.
24134 SDValue NewChain = STChain->getChain();
24136 for (unsigned I = ChainedStores.size(); I;) {
24137 StoreSDNode *S = ChainedStores[--I];
24140 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24141 TFOps.push_back(SDValue(S, 0));
24142 ChainedStores[I] = S;
24143 }
24144
24145 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24147 SDValue NewST;
24148 if (St->isTruncatingStore())
24149 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24150 St->getBasePtr(), St->getMemoryVT(),
24151 St->getMemOperand());
24152 else
24153 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24154 St->getBasePtr(), St->getMemOperand());
24155
24156 TFOps.push_back(NewST);
24157
24158 // If we improved every element of TFOps, then we've lost the dependence on
24159 // NewChain to successors of St and we need to add it back to TFOps. Do so at
24160 // the beginning to keep relative order consistent with FindBetterChains.
24161 auto hasImprovedChain = [&](SDValue ST) -> bool {
24162 return ST->getOperand(0) != NewChain;
24163 };
24165 if (AddNewChain)
24166 TFOps.insert(TFOps.begin(), NewChain);
24167
24169 CombineTo(St, TF);
24170
24171 // Add TF and its operands to the worklist.
24172 AddToWorklist(TF.getNode());
24173 for (const SDValue &Op : TF->ops())
24174 AddToWorklist(Op.getNode());
24175 AddToWorklist(STChain);
24176 return true;
24177}
24178
24179bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24180 if (OptLevel == CodeGenOpt::None)
24181 return false;
24182
24184
24185 // We must have a base and an offset.
24186 if (!BasePtr.getBase().getNode())
24187 return false;
24188
24189 // Do not handle stores to undef base pointers.
24190 if (BasePtr.getBase().isUndef())
24191 return false;
24192
24193 // Directly improve a chain of disjoint stores starting at St.
24195 return true;
24196
24197 // Improve St's Chain..
24198 SDValue BetterChain = FindBetterChain(St, St->getChain());
24199 if (St->getChain() != BetterChain) {
24201 return true;
24202 }
24203 return false;
24204}
24205
24206/// This is the entry point for the file.
24208 CodeGenOpt::Level OptLevel) {
24209 /// This is the main entry point to this class.
24210 DAGCombiner(*this, AA, OptLevel).Run(Level);
24211}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static uint64_t * getMemory(unsigned numWords)
A utility function for allocating memory and checking for allocation failure.
Definition APInt.cpp:43
This file implements a class to represent arbitrary precision integral constant values and operations...
@ Scaled
This file contains the simple types necessary to represent the attributes associated with functions a...
for(auto &MBB :MF)
SmallVector< MachineOperand, 4 > Cond
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< ShadowStackGC > C("shadow-stack", "Very portable GC for uncooperative code generators")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition Compiler.h:294
return Changed
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static const Optional< ByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, bool Root=false)
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propapagtion pattern try to break it up to generate somet...
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI)
Return true if 'Use' is a load or a store that uses N as its base pointer and that N may be folded in...
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG)
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned)
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index, bool Scaled, SelectionDAG &DAG)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue Carry0, SDValue Carry1, SDNode *N)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
Generate Min/Max node.
static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
PropagateLiveness Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
static ManagedStatic< DebugCounter > DC
#define LLVM_DEBUG(X)
Definition Debug.h:101
This file defines the DenseMap class.
uint64_t Offset
uint64_t Addr
uint32_t Index
uint64_t Size
Optional< std::vector< StOtherPiece > > Other
Definition ELFYAML.cpp:1202
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
iv Induction Variable Users
Definition IVUsers.cpp:52
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static void removeFromWorklist(Instruction *I, std::vector< Instruction * > &Worklist)
Remove all instances of I from the worklist vector specified.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
This file provides None, an enumerator for use in implicit constructors of various (usually templated...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
This file provides Optional, a template class modeled in the spirit of OCaml's 'opt' variant.
if(VerifyEach)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static StringRef getExtensionType(StringRef Ext)
#define ROTR(x, n)
Definition SHA256.cpp:36
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
static unsigned getScalarSizeInBits(Type *Ty)
This file describes how to lower LLVM code to machine code.
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:199
vector combine
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
static constexpr int Concat[]
Value * RHS
Value * LHS
xray Insert XRay ops
APInt bitcastToAPInt() const
Definition APFloat.h:1129
Class for arbitrary precision integers.
Definition APInt.h:75
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:214
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1748
APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:950
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:209
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1467
APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:881
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:347
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:241
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1656
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1412
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1044
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:595
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition APInt.h:1539
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition APInt.h:1500
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:456
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:452
bool isMask(unsigned numBits) const
Definition APInt.h:469
APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:926
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:289
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:279
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:177
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition APInt.h:1555
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:269
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:222
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1154
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition Type.cpp:638
static bool computeAliasing(const SDNode *Op0, const Optional< int64_t > NumBytes0, const SDNode *Op1, const Optional< int64_t > NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ISD::CondCode get() const
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
const APFloat & getValueAPF() const
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:257
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:113
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:244
bool isBigEndian() const
Definition DataLayout.h:245
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition DataLayout.h:506
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
bool isScalar() const
Counting predicates.
Definition TypeSize.h:395
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:658
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:316
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:624
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition TypeSize.h:298
static ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:283
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Machine Value Type.
SimpleValueType SimpleTy
static MVT getIntegerVT(unsigned BitWidth)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
This class is used to represent an MGATHER node.
This is a base class used to represent MGATHER and MSCATTER nodes.
This class is used to represent an MLOAD node.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
static uint64_t getSizeOrUnknown(const TypeSize &T)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOpt::Level OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
bool isKnownNeverZero(SDValue Op) const
Test whether the given SDValue is known to contain non-zero value(s).
const TargetSubtargetInfo & getSubtarget() const
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, unsigned Num)
Like ReplaceAllUsesOfValueWith, but for multiple values at once.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
bool shouldOptForSize() const
OverflowKind computeOverflowKind(SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 node can overflow.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits)
See if the specified operand can be simplified with the knowledge that only the bits specified by Dem...
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isKnownToBeAPowerOfTwo(SDValue Val) const
Test if the given value is known to have exactly one bit set.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
void Combine(CombineLevel Level, AAResults *AA, CodeGenOpt::Level OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:308
This class is used to represent ISD::STORE nodes.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isMulAddWithConstProfitable(const SDValue &AddNode, const SDValue &ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool hasBitPreservingFPLogic(EVT VT) const
Return true if it is safe to transform an integer-domain bitwise operation into the equivalent floati...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOpt::Level OptLevel) const
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
virtual bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool isNarrowingProfitable(EVT, EVT) const
Return true if it's profitable to narrow operations of type VT1 to VT2.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount though its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
ScalarTy getFixedSize() const
Definition TypeSize.h:425
static TypeSize Fixed(ScalarTy MinVal)
Definition TypeSize.h:422
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:72
Value * getOperand(unsigned i) const
Definition User.h:169
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:74
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:434
use_iterator use_begin()
Definition Value.h:360
bool use_empty() const
Definition Value.h:344
iterator_range< use_iterator > uses()
Definition Value.h:376
Implementation for an ilist node.
Definition ilist_node.h:40
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2133
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2138
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:80
BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI)
Returns a BaseIndexOffset which describes the pointer in Ptr.
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:702
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:675
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:462
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:44
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:250
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:535
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:666
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:354
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:470
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition ISDOpcodes.h:925
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:360
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:732
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:466
@ GlobalAddress
Definition ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:739
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:519
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:377
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:640
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:769
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:255
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:862
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:852
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:229
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:726
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:583
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:674
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:290
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:710
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition ISDOpcodes.h:870
@ BR_CC
BR_CC - Conditional branch.
Definition ISDOpcodes.h:967
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:314
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:609
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:336
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:679
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:211
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:590
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:208
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:310
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:614
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:657
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:563
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:549
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition ISDOpcodes.h:909
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:511
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:729
@ TargetConstantFP
Definition ISDOpcodes.h:159
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:694
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition ISDOpcodes.h:902
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:346
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:318
@ TargetFrameIndex
Definition ISDOpcodes.h:166
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:758
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:747
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:626
@ LIFETIME_START
This corresponds to the llvm.lifetime.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:837
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:688
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition ISDOpcodes.h:915
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:785
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:632
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition ISDOpcodes.h:260
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:387
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:500
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:818
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:780
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:804
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:735
@ BRCOND
BRCOND - Conditional branch.
Definition ISDOpcodes.h:960
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:476
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:327
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:300
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:491
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant BUI...
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
Optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
@ UNSIGNED_UNSCALED
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
@ VecLoad
Definition NVPTX.h:85
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
Definition MathExtras.h:57
DiagnosticInfoOptimizationBase::Argument NV
/file This file defines the SmallVector class.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:266
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:683
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:609
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:339
void stable_sort(R &&Range)
Definition STLExtras.h:1719
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1604
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1584
bool operator==(uint64_t V1, const APInt &V2)
Definition APInt.h:1986
bool isConstantOrConstantVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a build vector of constant integers.
Definition Utils.cpp:1107
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:455
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:138
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:1988
bool operator>=(int64_t V1, const APSInt &V2)
Definition APSInt.h:338
std::string & operator+=(std::string &buffer, StringRef string)
Definition StringRef.h:960
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:496
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition STLExtras.h:359
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:602
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:702
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
unsigned M1(unsigned Val)
Definition VE.h:371
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1591
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:596
bool operator>(int64_t V1, const APSInt &V2)
Definition APSInt.h:340
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition MathExtras.h:225
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:491
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition MathExtras.h:156
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1598
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition ArrayRef.h:474
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1529
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition MathExtras.h:525
bool isNullOrNullSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
@ Z
zlib style complession
CombineLevel
Definition DAGCombine.h:15
@ AfterLegalizeDAG
Definition DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
@ AfterLegalizeTypes
Definition DAGCombine.h:17
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition STLExtras.h:1751
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1667
unsigned M0(unsigned Val)
Definition VE.h:370
std::enable_if_t<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > cast(const Y &Val)
Definition Casting.h:254
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1674
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition STLExtras.h:1649
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:211
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:207
bool operator<=(int64_t V1, const APSInt &V2)
Definition APSInt.h:337
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:651
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:189
static unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:211
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:205
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:363
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:130
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:257
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:273
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:140
ElementCount getVectorElementCount() const
Definition ValueTypes.h:323
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:341
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:216
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:332
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:353
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:289
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:349
bool isFixedLengthVector() const
Definition ValueTypes.h:165
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:155
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:296
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:229
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:161
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:301
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:150
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:309
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:145
Helper struct to store a base, index and offset that forms an address.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:226
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:298
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:78
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:109
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...